about summary refs log tree commit diff
path: root/compiler/rustc_codegen_llvm/src
diff options
context:
space:
mode:
Diffstat (limited to 'compiler/rustc_codegen_llvm/src')
-rw-r--r--compiler/rustc_codegen_llvm/src/abi.rs67
-rw-r--r--compiler/rustc_codegen_llvm/src/allocator.rs4
-rw-r--r--compiler/rustc_codegen_llvm/src/attributes.rs27
-rw-r--r--compiler/rustc_codegen_llvm/src/back/archive.rs11
-rw-r--r--compiler/rustc_codegen_llvm/src/back/lto.rs45
-rw-r--r--compiler/rustc_codegen_llvm/src/back/owned_target_machine.rs2
-rw-r--r--compiler/rustc_codegen_llvm/src/back/write.rs262
-rw-r--r--compiler/rustc_codegen_llvm/src/base.rs2
-rw-r--r--compiler/rustc_codegen_llvm/src/builder.rs176
-rw-r--r--compiler/rustc_codegen_llvm/src/builder/autodiff.rs111
-rw-r--r--compiler/rustc_codegen_llvm/src/common.rs22
-rw-r--r--compiler/rustc_codegen_llvm/src/consts.rs77
-rw-r--r--compiler/rustc_codegen_llvm/src/context.rs81
-rw-r--r--compiler/rustc_codegen_llvm/src/coverageinfo/mapgen.rs6
-rw-r--r--compiler/rustc_codegen_llvm/src/coverageinfo/mapgen/covfun.rs57
-rw-r--r--compiler/rustc_codegen_llvm/src/coverageinfo/mod.rs29
-rw-r--r--compiler/rustc_codegen_llvm/src/debuginfo/create_scope_map.rs71
-rw-r--r--compiler/rustc_codegen_llvm/src/debuginfo/dwarf_const.rs37
-rw-r--r--compiler/rustc_codegen_llvm/src/debuginfo/gdb.rs2
-rw-r--r--compiler/rustc_codegen_llvm/src/debuginfo/metadata.rs88
-rw-r--r--compiler/rustc_codegen_llvm/src/debuginfo/metadata/enums/cpp_like.rs46
-rw-r--r--compiler/rustc_codegen_llvm/src/debuginfo/metadata/type_map.rs6
-rw-r--r--compiler/rustc_codegen_llvm/src/debuginfo/mod.rs122
-rw-r--r--compiler/rustc_codegen_llvm/src/debuginfo/namespace.rs8
-rw-r--r--compiler/rustc_codegen_llvm/src/debuginfo/utils.rs2
-rw-r--r--compiler/rustc_codegen_llvm/src/declare.rs32
-rw-r--r--compiler/rustc_codegen_llvm/src/errors.rs7
-rw-r--r--compiler/rustc_codegen_llvm/src/intrinsic.rs626
-rw-r--r--compiler/rustc_codegen_llvm/src/lib.rs17
-rw-r--r--compiler/rustc_codegen_llvm/src/llvm/enzyme_ffi.rs9
-rw-r--r--compiler/rustc_codegen_llvm/src/llvm/ffi.rs281
-rw-r--r--compiler/rustc_codegen_llvm/src/llvm/mod.rs8
-rw-r--r--compiler/rustc_codegen_llvm/src/llvm_util.rs202
-rw-r--r--compiler/rustc_codegen_llvm/src/mono_item.rs5
-rw-r--r--compiler/rustc_codegen_llvm/src/type_.rs21
35 files changed, 1499 insertions, 1070 deletions
diff --git a/compiler/rustc_codegen_llvm/src/abi.rs b/compiler/rustc_codegen_llvm/src/abi.rs
index 1d35138b013..13846c8ab4b 100644
--- a/compiler/rustc_codegen_llvm/src/abi.rs
+++ b/compiler/rustc_codegen_llvm/src/abi.rs
@@ -1,10 +1,10 @@
+use std::borrow::Borrow;
 use std::cmp;
 
 use libc::c_uint;
 use rustc_abi as abi;
 pub(crate) use rustc_abi::ExternAbi;
-use rustc_abi::Primitive::Int;
-use rustc_abi::{HasDataLayout, Size};
+use rustc_abi::{HasDataLayout, Primitive, Reg, RegKind, Size};
 use rustc_codegen_ssa::MemFlags;
 use rustc_codegen_ssa::mir::operand::{OperandRef, OperandValue};
 use rustc_codegen_ssa::mir::place::{PlaceRef, PlaceValue};
@@ -312,7 +312,7 @@ impl<'ll, 'tcx> ArgAbiBuilderMethods<'tcx> for Builder<'_, 'll, 'tcx> {
 pub(crate) trait FnAbiLlvmExt<'ll, 'tcx> {
     fn llvm_type(&self, cx: &CodegenCx<'ll, 'tcx>) -> &'ll Type;
     fn ptr_to_llvm_type(&self, cx: &CodegenCx<'ll, 'tcx>) -> &'ll Type;
-    fn llvm_cconv(&self) -> llvm::CallConv;
+    fn llvm_cconv(&self, cx: &CodegenCx<'ll, 'tcx>) -> llvm::CallConv;
 
     /// Apply attributes to a function declaration/definition.
     fn apply_attrs_llfn(
@@ -404,8 +404,8 @@ impl<'ll, 'tcx> FnAbiLlvmExt<'ll, 'tcx> for FnAbi<'tcx, Ty<'tcx>> {
         cx.type_ptr_ext(cx.data_layout().instruction_address_space)
     }
 
-    fn llvm_cconv(&self) -> llvm::CallConv {
-        self.conv.into()
+    fn llvm_cconv(&self, cx: &CodegenCx<'ll, 'tcx>) -> llvm::CallConv {
+        llvm::CallConv::from_conv(self.conv, cx.tcx.sess.target.arch.borrow())
     }
 
     fn apply_attrs_llfn(
@@ -439,7 +439,7 @@ impl<'ll, 'tcx> FnAbiLlvmExt<'ll, 'tcx> for FnAbi<'tcx, Ty<'tcx>> {
         let apply_range_attr = |idx: AttributePlace, scalar: rustc_abi::Scalar| {
             if cx.sess().opts.optimize != config::OptLevel::No
                 && llvm_util::get_version() >= (19, 0, 0)
-                && matches!(scalar.primitive(), Int(..))
+                && matches!(scalar.primitive(), Primitive::Int(..))
                 // If the value is a boolean, the range is 0..2 and that ultimately
                 // become 0..0 when the type becomes i1, which would be rejected
                 // by the LLVM verifier.
@@ -447,11 +447,11 @@ impl<'ll, 'tcx> FnAbiLlvmExt<'ll, 'tcx> for FnAbi<'tcx, Ty<'tcx>> {
                 // LLVM also rejects full range.
                 && !scalar.is_always_valid(cx)
             {
-                attributes::apply_to_llfn(llfn, idx, &[llvm::CreateRangeAttr(
-                    cx.llcx,
-                    scalar.size(cx),
-                    scalar.valid_range(cx),
-                )]);
+                attributes::apply_to_llfn(
+                    llfn,
+                    idx,
+                    &[llvm::CreateRangeAttr(cx.llcx, scalar.size(cx), scalar.valid_range(cx))],
+                );
             }
         };
 
@@ -471,10 +471,14 @@ impl<'ll, 'tcx> FnAbiLlvmExt<'ll, 'tcx> for FnAbi<'tcx, Ty<'tcx>> {
                 );
                 attributes::apply_to_llfn(llfn, llvm::AttributePlace::Argument(i), &[sret]);
                 if cx.sess().opts.optimize != config::OptLevel::No {
-                    attributes::apply_to_llfn(llfn, llvm::AttributePlace::Argument(i), &[
-                        llvm::AttributeKind::Writable.create_attr(cx.llcx),
-                        llvm::AttributeKind::DeadOnUnwind.create_attr(cx.llcx),
-                    ]);
+                    attributes::apply_to_llfn(
+                        llfn,
+                        llvm::AttributePlace::Argument(i),
+                        &[
+                            llvm::AttributeKind::Writable.create_attr(cx.llcx),
+                            llvm::AttributeKind::DeadOnUnwind.create_attr(cx.llcx),
+                        ],
+                    );
                 }
             }
             PassMode::Cast { cast, pad_i32: _ } => {
@@ -573,7 +577,7 @@ impl<'ll, 'tcx> FnAbiLlvmExt<'ll, 'tcx> for FnAbi<'tcx, Ty<'tcx>> {
         if bx.cx.sess().opts.optimize != config::OptLevel::No
                 && llvm_util::get_version() < (19, 0, 0)
                 && let abi::BackendRepr::Scalar(scalar) = self.ret.layout.backend_repr
-                && matches!(scalar.primitive(), Int(..))
+                && matches!(scalar.primitive(), Primitive::Int(..))
                 // If the value is a boolean, the range is 0..2 and that ultimately
                 // become 0..0 when the type becomes i1, which would be rejected
                 // by the LLVM verifier.
@@ -592,9 +596,11 @@ impl<'ll, 'tcx> FnAbiLlvmExt<'ll, 'tcx> for FnAbi<'tcx, Ty<'tcx>> {
                         bx.cx.llcx,
                         bx.cx.type_array(bx.cx.type_i8(), arg.layout.size.bytes()),
                     );
-                    attributes::apply_to_callsite(callsite, llvm::AttributePlace::Argument(i), &[
-                        byval,
-                    ]);
+                    attributes::apply_to_callsite(
+                        callsite,
+                        llvm::AttributePlace::Argument(i),
+                        &[byval],
+                    );
                 }
                 PassMode::Direct(attrs)
                 | PassMode::Indirect { attrs, meta_attrs: None, on_stack: false } => {
@@ -617,7 +623,7 @@ impl<'ll, 'tcx> FnAbiLlvmExt<'ll, 'tcx> for FnAbi<'tcx, Ty<'tcx>> {
             }
         }
 
-        let cconv = self.llvm_cconv();
+        let cconv = self.llvm_cconv(&bx.cx);
         if cconv != llvm::CCallConv {
             llvm::SetInstructionCallConv(callsite, cconv);
         }
@@ -626,9 +632,11 @@ impl<'ll, 'tcx> FnAbiLlvmExt<'ll, 'tcx> for FnAbi<'tcx, Ty<'tcx>> {
             // This will probably get ignored on all targets but those supporting the TrustZone-M
             // extension (thumbv8m targets).
             let cmse_nonsecure_call = llvm::CreateAttrString(bx.cx.llcx, "cmse_nonsecure_call");
-            attributes::apply_to_callsite(callsite, llvm::AttributePlace::Function, &[
-                cmse_nonsecure_call,
-            ]);
+            attributes::apply_to_callsite(
+                callsite,
+                llvm::AttributePlace::Function,
+                &[cmse_nonsecure_call],
+            );
         }
 
         // Some intrinsics require that an elementtype attribute (with the pointee type of a
@@ -655,8 +663,8 @@ impl<'tcx> AbiBuilderMethods<'tcx> for Builder<'_, '_, 'tcx> {
     }
 }
 
-impl From<Conv> for llvm::CallConv {
-    fn from(conv: Conv) -> Self {
+impl llvm::CallConv {
+    pub(crate) fn from_conv(conv: Conv, arch: &str) -> Self {
         match conv {
             Conv::C
             | Conv::Rust
@@ -666,6 +674,15 @@ impl From<Conv> for llvm::CallConv {
             Conv::Cold => llvm::ColdCallConv,
             Conv::PreserveMost => llvm::PreserveMost,
             Conv::PreserveAll => llvm::PreserveAll,
+            Conv::GpuKernel => {
+                if arch == "amdgpu" {
+                    llvm::AmdgpuKernel
+                } else if arch == "nvptx64" {
+                    llvm::PtxKernel
+                } else {
+                    panic!("Architecture {arch} does not support GpuKernel calling convention");
+                }
+            }
             Conv::AvrInterrupt => llvm::AvrInterrupt,
             Conv::AvrNonBlockingInterrupt => llvm::AvrNonBlockingInterrupt,
             Conv::ArmAapcs => llvm::ArmAapcsCallConv,
diff --git a/compiler/rustc_codegen_llvm/src/allocator.rs b/compiler/rustc_codegen_llvm/src/allocator.rs
index 149ded28356..66723cbf882 100644
--- a/compiler/rustc_codegen_llvm/src/allocator.rs
+++ b/compiler/rustc_codegen_llvm/src/allocator.rs
@@ -81,13 +81,13 @@ pub(crate) unsafe fn codegen(
         llvm::set_visibility(ll_g, llvm::Visibility::from_generic(tcx.sess.default_visibility()));
         let val = tcx.sess.opts.unstable_opts.oom.should_panic();
         let llval = llvm::LLVMConstInt(i8, val as u64, False);
-        llvm::LLVMSetInitializer(ll_g, llval);
+        llvm::set_initializer(ll_g, llval);
 
         let name = NO_ALLOC_SHIM_IS_UNSTABLE;
         let ll_g = llvm::LLVMRustGetOrInsertGlobal(llmod, name.as_c_char_ptr(), name.len(), i8);
         llvm::set_visibility(ll_g, llvm::Visibility::from_generic(tcx.sess.default_visibility()));
         let llval = llvm::LLVMConstInt(i8, 0, False);
-        llvm::LLVMSetInitializer(ll_g, llval);
+        llvm::set_initializer(ll_g, llval);
     }
 
     if tcx.sess.opts.debuginfo != DebugInfo::None {
diff --git a/compiler/rustc_codegen_llvm/src/attributes.rs b/compiler/rustc_codegen_llvm/src/attributes.rs
index f8454fd9960..3d7afa17bdf 100644
--- a/compiler/rustc_codegen_llvm/src/attributes.rs
+++ b/compiler/rustc_codegen_llvm/src/attributes.rs
@@ -37,7 +37,9 @@ fn inline_attr<'ll>(cx: &CodegenCx<'ll, '_>, inline: InlineAttr) -> Option<&'ll
     }
     match inline {
         InlineAttr::Hint => Some(AttributeKind::InlineHint.create_attr(cx.llcx)),
-        InlineAttr::Always => Some(AttributeKind::AlwaysInline.create_attr(cx.llcx)),
+        InlineAttr::Always | InlineAttr::Force { .. } => {
+            Some(AttributeKind::AlwaysInline.create_attr(cx.llcx))
+        }
         InlineAttr::Never => {
             if cx.sess().target.arch != "amdgpu" {
                 Some(AttributeKind::NoInline.create_attr(cx.llcx))
@@ -331,9 +333,12 @@ pub(crate) fn llfn_attrs_from_instance<'ll, 'tcx>(
     let mut to_add = SmallVec::<[_; 16]>::new();
 
     match codegen_fn_attrs.optimize {
-        OptimizeAttr::None => {
+        OptimizeAttr::Default => {
             to_add.extend(default_optimisation_attrs(cx));
         }
+        OptimizeAttr::DoNotOptimize => {
+            to_add.push(llvm::AttributeKind::OptimizeNone.create_attr(cx.llcx));
+        }
         OptimizeAttr::Size => {
             to_add.push(llvm::AttributeKind::MinSize.create_attr(cx.llcx));
             to_add.push(llvm::AttributeKind::OptimizeForSize.create_attr(cx.llcx));
@@ -341,12 +346,12 @@ pub(crate) fn llfn_attrs_from_instance<'ll, 'tcx>(
         OptimizeAttr::Speed => {}
     }
 
-    let inline =
-        if codegen_fn_attrs.inline == InlineAttr::None && instance.def.requires_inline(cx.tcx) {
-            InlineAttr::Hint
-        } else {
-            codegen_fn_attrs.inline
-        };
+    // `optnone` requires `noinline`
+    let inline = match (codegen_fn_attrs.inline, &codegen_fn_attrs.optimize) {
+        (_, OptimizeAttr::DoNotOptimize) => InlineAttr::Never,
+        (InlineAttr::None, _) if instance.def.requires_inline(cx.tcx) => InlineAttr::Hint,
+        (inline, _) => inline,
+    };
     to_add.extend(inline_attr(cx, inline));
 
     // The `uwtable` attribute according to LLVM is:
@@ -472,7 +477,11 @@ pub(crate) fn llfn_attrs_from_instance<'ll, 'tcx>(
         let allocated_pointer = AttributeKind::AllocatedPointer.create_attr(cx.llcx);
         attributes::apply_to_llfn(llfn, AttributePlace::Argument(0), &[allocated_pointer]);
     }
-    if let Some(align) = codegen_fn_attrs.alignment {
+    // function alignment can be set globally with the `-Zmin-function-alignment=<n>` flag;
+    // the alignment from a `#[repr(align(<n>))]` is used if it specifies a higher alignment.
+    if let Some(align) =
+        Ord::max(cx.tcx.sess.opts.unstable_opts.min_function_alignment, codegen_fn_attrs.alignment)
+    {
         llvm::set_alignment(llfn, align);
     }
     if let Some(backchain) = backchain_attr(cx) {
diff --git a/compiler/rustc_codegen_llvm/src/back/archive.rs b/compiler/rustc_codegen_llvm/src/back/archive.rs
index 33a956e552f..93553f3f364 100644
--- a/compiler/rustc_codegen_llvm/src/back/archive.rs
+++ b/compiler/rustc_codegen_llvm/src/back/archive.rs
@@ -11,7 +11,7 @@ use rustc_codegen_ssa::back::archive::{
 use rustc_session::Session;
 
 use crate::llvm::archive_ro::{ArchiveRO, Child};
-use crate::llvm::{self, ArchiveKind};
+use crate::llvm::{self, ArchiveKind, last_error};
 
 /// Helper for adding many files to an archive.
 #[must_use = "must call build() to finish building the archive"]
@@ -169,6 +169,8 @@ impl<'a> LlvmArchiveBuilder<'a> {
             .unwrap_or_else(|kind| self.sess.dcx().emit_fatal(UnknownArchiveKind { kind }));
 
         let mut additions = mem::take(&mut self.additions);
+        // Values in the `members` list below will contain pointers to the strings allocated here.
+        // So they need to get dropped after all elements of `members` get freed.
         let mut strings = Vec::new();
         let mut members = Vec::new();
 
@@ -229,12 +231,7 @@ impl<'a> LlvmArchiveBuilder<'a> {
                 self.sess.target.arch == "arm64ec",
             );
             let ret = if r.into_result().is_err() {
-                let err = llvm::LLVMRustGetLastError();
-                let msg = if err.is_null() {
-                    "failed to write archive".into()
-                } else {
-                    String::from_utf8_lossy(CStr::from_ptr(err).to_bytes())
-                };
+                let msg = last_error().unwrap_or_else(|| "failed to write archive".into());
                 Err(io::Error::new(io::ErrorKind::Other, msg))
             } else {
                 Ok(!members.is_empty())
diff --git a/compiler/rustc_codegen_llvm/src/back/lto.rs b/compiler/rustc_codegen_llvm/src/back/lto.rs
index 4adf99e91d0..7262fce4911 100644
--- a/compiler/rustc_codegen_llvm/src/back/lto.rs
+++ b/compiler/rustc_codegen_llvm/src/back/lto.rs
@@ -1,7 +1,6 @@
 use std::collections::BTreeMap;
 use std::ffi::{CStr, CString};
 use std::fs::File;
-use std::mem::ManuallyDrop;
 use std::path::Path;
 use std::sync::Arc;
 use std::{io, iter, slice};
@@ -9,7 +8,7 @@ use std::{io, iter, slice};
 use object::read::archive::ArchiveFile;
 use rustc_codegen_ssa::back::lto::{LtoModuleCodegen, SerializedModule, ThinModule, ThinShared};
 use rustc_codegen_ssa::back::symbol_export;
-use rustc_codegen_ssa::back::write::{CodegenContext, FatLtoInput, TargetMachineFactoryConfig};
+use rustc_codegen_ssa::back::write::{CodegenContext, FatLtoInput};
 use rustc_codegen_ssa::traits::*;
 use rustc_codegen_ssa::{ModuleCodegen, ModuleKind, looks_like_rust_object_file};
 use rustc_data_structures::fx::FxHashMap;
@@ -607,10 +606,31 @@ pub(crate) fn run_pass_manager(
 
     // If this rustc version was build with enzyme/autodiff enabled, and if users applied the
     // `#[autodiff]` macro at least once, then we will later call llvm_optimize a second time.
-    let first_run = true;
     debug!("running llvm pm opt pipeline");
     unsafe {
-        write::llvm_optimize(cgcx, dcx, module, config, opt_level, opt_stage, first_run)?;
+        write::llvm_optimize(
+            cgcx,
+            dcx,
+            module,
+            config,
+            opt_level,
+            opt_stage,
+            write::AutodiffStage::DuringAD,
+        )?;
+    }
+    // FIXME(ZuseZ4): Make this more granular
+    if cfg!(llvm_enzyme) && !thin {
+        unsafe {
+            write::llvm_optimize(
+                cgcx,
+                dcx,
+                module,
+                config,
+                opt_level,
+                llvm::OptStage::FatLTO,
+                write::AutodiffStage::PostAD,
+            )?;
+        }
     }
     debug!("lto done");
     Ok(())
@@ -622,7 +642,7 @@ unsafe impl Send for ModuleBuffer {}
 unsafe impl Sync for ModuleBuffer {}
 
 impl ModuleBuffer {
-    pub fn new(m: &llvm::Module) -> ModuleBuffer {
+    pub(crate) fn new(m: &llvm::Module) -> ModuleBuffer {
         ModuleBuffer(unsafe { llvm::LLVMRustModuleBufferCreate(m) })
     }
 }
@@ -664,7 +684,7 @@ unsafe impl Send for ThinBuffer {}
 unsafe impl Sync for ThinBuffer {}
 
 impl ThinBuffer {
-    pub fn new(m: &llvm::Module, is_thin: bool, emit_summary: bool) -> ThinBuffer {
+    pub(crate) fn new(m: &llvm::Module, is_thin: bool, emit_summary: bool) -> ThinBuffer {
         unsafe {
             let buffer = llvm::LLVMRustThinLTOBufferCreate(m, is_thin, emit_summary);
             ThinBuffer(buffer)
@@ -706,18 +726,15 @@ pub(crate) unsafe fn optimize_thin_module(
     let dcx = dcx.handle();
 
     let module_name = &thin_module.shared.module_names[thin_module.idx];
-    let tm_factory_config = TargetMachineFactoryConfig::new(cgcx, module_name.to_str().unwrap());
-    let tm = (cgcx.tm_factory)(tm_factory_config).map_err(|e| write::llvm_err(dcx, e))?;
 
     // Right now the implementation we've got only works over serialized
     // modules, so we create a fresh new LLVM context and parse the module
     // into that context. One day, however, we may do this for upstream
     // crates but for locally codegened modules we may be able to reuse
     // that LLVM Context and Module.
-    let llcx = unsafe { llvm::LLVMRustContextCreate(cgcx.fewer_names) };
-    let llmod_raw = parse_module(llcx, module_name, thin_module.data(), dcx)? as *const _;
+    let module_llvm = ModuleLlvm::parse(cgcx, module_name, thin_module.data(), dcx)?;
     let mut module = ModuleCodegen {
-        module_llvm: ModuleLlvm { llmod_raw, llcx, tm: ManuallyDrop::new(tm) },
+        module_llvm,
         name: thin_module.name().to_string(),
         kind: ModuleKind::Regular,
     };
@@ -737,11 +754,7 @@ pub(crate) unsafe fn optimize_thin_module(
         {
             let _timer =
                 cgcx.prof.generic_activity_with_arg("LLVM_thin_lto_rename", thin_module.name());
-            if unsafe {
-                !llvm::LLVMRustPrepareThinLTORename(thin_module.shared.data.0, llmod, target)
-            } {
-                return Err(write::llvm_err(dcx, LlvmError::PrepareThinLtoModule));
-            }
+            unsafe { llvm::LLVMRustPrepareThinLTORename(thin_module.shared.data.0, llmod, target) };
             save_temp_bitcode(cgcx, &module, "thin-lto-after-rename");
         }
 
diff --git a/compiler/rustc_codegen_llvm/src/back/owned_target_machine.rs b/compiler/rustc_codegen_llvm/src/back/owned_target_machine.rs
index 4cbd49aa44d..f075f332462 100644
--- a/compiler/rustc_codegen_llvm/src/back/owned_target_machine.rs
+++ b/compiler/rustc_codegen_llvm/src/back/owned_target_machine.rs
@@ -17,7 +17,7 @@ pub struct OwnedTargetMachine {
 }
 
 impl OwnedTargetMachine {
-    pub fn new(
+    pub(crate) fn new(
         triple: &CStr,
         cpu: &CStr,
         features: &CStr,
diff --git a/compiler/rustc_codegen_llvm/src/back/write.rs b/compiler/rustc_codegen_llvm/src/back/write.rs
index 509b24dd703..58933a77e53 100644
--- a/compiler/rustc_codegen_llvm/src/back/write.rs
+++ b/compiler/rustc_codegen_llvm/src/back/write.rs
@@ -40,7 +40,7 @@ use crate::errors::{
     WithLlvmError, WriteBytecode,
 };
 use crate::llvm::diagnostic::OptimizationDiagnosticKind::*;
-use crate::llvm::{self, DiagnosticInfo, PassManager};
+use crate::llvm::{self, DiagnosticInfo};
 use crate::type_::Type;
 use crate::{LlvmCodegenBackend, ModuleLlvm, base, common, llvm_util};
 
@@ -54,7 +54,7 @@ pub(crate) fn llvm_err<'a>(dcx: DiagCtxtHandle<'_>, err: LlvmError<'a>) -> Fatal
 fn write_output_file<'ll>(
     dcx: DiagCtxtHandle<'_>,
     target: &'ll llvm::TargetMachine,
-    pm: &llvm::PassManager<'ll>,
+    no_builtins: bool,
     m: &'ll llvm::Module,
     output: &Path,
     dwo_output: Option<&Path>,
@@ -63,16 +63,19 @@ fn write_output_file<'ll>(
     verify_llvm_ir: bool,
 ) -> Result<(), FatalError> {
     debug!("write_output_file output={:?} dwo_output={:?}", output, dwo_output);
-    unsafe {
-        let output_c = path_to_c_string(output);
-        let dwo_output_c;
-        let dwo_output_ptr = if let Some(dwo_output) = dwo_output {
-            dwo_output_c = path_to_c_string(dwo_output);
-            dwo_output_c.as_ptr()
-        } else {
-            std::ptr::null()
-        };
-        let result = llvm::LLVMRustWriteOutputFile(
+    let output_c = path_to_c_string(output);
+    let dwo_output_c;
+    let dwo_output_ptr = if let Some(dwo_output) = dwo_output {
+        dwo_output_c = path_to_c_string(dwo_output);
+        dwo_output_c.as_ptr()
+    } else {
+        std::ptr::null()
+    };
+    let result = unsafe {
+        let pm = llvm::LLVMCreatePassManager();
+        llvm::LLVMAddAnalysisPasses(target, pm);
+        llvm::LLVMRustAddLibraryInfo(pm, m, no_builtins);
+        llvm::LLVMRustWriteOutputFile(
             target,
             pm,
             m,
@@ -80,22 +83,22 @@ fn write_output_file<'ll>(
             dwo_output_ptr,
             file_type,
             verify_llvm_ir,
-        );
+        )
+    };
 
-        // Record artifact sizes for self-profiling
-        if result == llvm::LLVMRustResult::Success {
-            let artifact_kind = match file_type {
-                llvm::FileType::ObjectFile => "object_file",
-                llvm::FileType::AssemblyFile => "assembly_file",
-            };
-            record_artifact_size(self_profiler_ref, artifact_kind, output);
-            if let Some(dwo_file) = dwo_output {
-                record_artifact_size(self_profiler_ref, "dwo_file", dwo_file);
-            }
+    // Record artifact sizes for self-profiling
+    if result == llvm::LLVMRustResult::Success {
+        let artifact_kind = match file_type {
+            llvm::FileType::ObjectFile => "object_file",
+            llvm::FileType::AssemblyFile => "assembly_file",
+        };
+        record_artifact_size(self_profiler_ref, artifact_kind, output);
+        if let Some(dwo_file) = dwo_output {
+            record_artifact_size(self_profiler_ref, "dwo_file", dwo_file);
         }
-
-        result.into_result().map_err(|()| llvm_err(dcx, LlvmError::WriteOutput { path: output }))
     }
+
+    result.into_result().map_err(|()| llvm_err(dcx, LlvmError::WriteOutput { path: output }))
 }
 
 pub(crate) fn create_informational_target_machine(
@@ -325,13 +328,17 @@ pub(crate) fn save_temp_bitcode(
     if !cgcx.save_temps {
         return;
     }
+    let ext = format!("{name}.bc");
+    let cgu = Some(&module.name[..]);
+    let path = cgcx.output_filenames.temp_path_ext(&ext, cgu);
+    write_bitcode_to_file(module, &path)
+}
+
+fn write_bitcode_to_file(module: &ModuleCodegen<ModuleLlvm>, path: &Path) {
     unsafe {
-        let ext = format!("{name}.bc");
-        let cgu = Some(&module.name[..]);
-        let path = cgcx.output_filenames.temp_path_ext(&ext, cgu);
-        let cstr = path_to_c_string(&path);
+        let path = path_to_c_string(&path);
         let llmod = module.module_llvm.llmod();
-        llvm::LLVMWriteBitcodeToFile(llmod, cstr.as_ptr());
+        llvm::LLVMWriteBitcodeToFile(llmod, path.as_ptr());
     }
 }
 
@@ -530,6 +537,16 @@ fn get_instr_profile_output_path(config: &ModuleConfig) -> Option<CString> {
     config.instrument_coverage.then(|| c"default_%m_%p.profraw".to_owned())
 }
 
+// PreAD will run llvm opts but disable size increasing opts (vectorization, loop unrolling)
+// DuringAD is the same as above, but also runs the enzyme opt and autodiff passes.
+// PostAD will run all opts, including size increasing opts.
+#[derive(Debug, Eq, PartialEq)]
+pub(crate) enum AutodiffStage {
+    PreAD,
+    DuringAD,
+    PostAD,
+}
+
 pub(crate) unsafe fn llvm_optimize(
     cgcx: &CodegenContext<LlvmCodegenBackend>,
     dcx: DiagCtxtHandle<'_>,
@@ -537,7 +554,7 @@ pub(crate) unsafe fn llvm_optimize(
     config: &ModuleConfig,
     opt_level: config::OptLevel,
     opt_stage: llvm::OptStage,
-    skip_size_increasing_opts: bool,
+    autodiff_stage: AutodiffStage,
 ) -> Result<(), FatalError> {
     // Enzyme:
     // The whole point of compiler based AD is to differentiate optimized IR instead of unoptimized
@@ -550,12 +567,16 @@ pub(crate) unsafe fn llvm_optimize(
     let unroll_loops;
     let vectorize_slp;
     let vectorize_loop;
+    let run_enzyme = cfg!(llvm_enzyme) && autodiff_stage == AutodiffStage::DuringAD;
 
     // When we build rustc with enzyme/autodiff support, we want to postpone size-increasing
-    // optimizations until after differentiation. FIXME(ZuseZ4): Before shipping on nightly,
+    // optimizations until after differentiation. Our pipeline is thus: (opt + enzyme), (full opt).
+    // We therefore have two calls to llvm_optimize, if autodiff is used.
+    //
+    // FIXME(ZuseZ4): Before shipping on nightly,
     // we should make this more granular, or at least check that the user has at least one autodiff
     // call in their code, to justify altering the compilation pipeline.
-    if skip_size_increasing_opts && cfg!(llvm_enzyme) {
+    if cfg!(llvm_enzyme) && autodiff_stage != AutodiffStage::PostAD {
         unroll_loops = false;
         vectorize_slp = false;
         vectorize_loop = false;
@@ -565,7 +586,7 @@ pub(crate) unsafe fn llvm_optimize(
         vectorize_slp = config.vectorize_slp;
         vectorize_loop = config.vectorize_loop;
     }
-    trace!(?unroll_loops, ?vectorize_slp, ?vectorize_loop);
+    trace!(?unroll_loops, ?vectorize_slp, ?vectorize_loop, ?run_enzyme);
     let using_thin_buffers = opt_stage == llvm::OptStage::PreLinkThinLTO || config.bitcode_needed();
     let pgo_gen_path = get_pgo_gen_path(config);
     let pgo_use_path = get_pgo_use_path(config);
@@ -633,6 +654,7 @@ pub(crate) unsafe fn llvm_optimize(
             vectorize_loop,
             config.no_builtins,
             config.emit_lifetime_markers,
+            run_enzyme,
             sanitizer_options.as_ref(),
             pgo_gen_path.as_ref().map_or(std::ptr::null(), |s| s.as_ptr()),
             pgo_use_path.as_ref().map_or(std::ptr::null(), |s| s.as_ptr()),
@@ -661,7 +683,6 @@ pub(crate) unsafe fn optimize(
 ) -> Result<(), FatalError> {
     let _timer = cgcx.prof.generic_activity_with_arg("LLVM_module_optimize", &*module.name);
 
-    let llmod = module.module_llvm.llmod();
     let llcx = &*module.module_llvm.llcx;
     let _handlers = DiagnosticHandlers::new(cgcx, dcx, llcx, module, CodegenDiagnosticsStage::Opt);
 
@@ -670,8 +691,7 @@ pub(crate) unsafe fn optimize(
 
     if config.emit_no_opt_bc {
         let out = cgcx.output_filenames.temp_path_ext("no-opt.bc", module_name);
-        let out = path_to_c_string(&out);
-        unsafe { llvm::LLVMWriteBitcodeToFile(llmod, out.as_ptr()) };
+        write_bitcode_to_file(module, &out)
     }
 
     // FIXME(ZuseZ4): support SanitizeHWAddress and prevent illegal/unsupported opts
@@ -684,18 +704,14 @@ pub(crate) unsafe fn optimize(
             _ => llvm::OptStage::PreLinkNoLTO,
         };
 
-        // If we know that we will later run AD, then we disable vectorization and loop unrolling
-        let skip_size_increasing_opts = cfg!(llvm_enzyme);
+        // If we know that we will later run AD, then we disable vectorization and loop unrolling.
+        // Otherwise we pretend AD is already done and run the normal opt pipeline (=PostAD).
+        // FIXME(ZuseZ4): Make this more granular, only set PreAD if we actually have autodiff
+        // usages, not just if we build rustc with autodiff support.
+        let autodiff_stage =
+            if cfg!(llvm_enzyme) { AutodiffStage::PreAD } else { AutodiffStage::PostAD };
         return unsafe {
-            llvm_optimize(
-                cgcx,
-                dcx,
-                module,
-                config,
-                opt_level,
-                opt_stage,
-                skip_size_increasing_opts,
-            )
+            llvm_optimize(cgcx, dcx, module, config, opt_level, opt_stage, autodiff_stage)
         };
     }
     Ok(())
@@ -744,37 +760,9 @@ pub(crate) unsafe fn codegen(
             create_msvc_imps(cgcx, llcx, llmod);
         }
 
-        // A codegen-specific pass manager is used to generate object
-        // files for an LLVM module.
-        //
-        // Apparently each of these pass managers is a one-shot kind of
-        // thing, so we create a new one for each type of output. The
-        // pass manager passed to the closure should be ensured to not
-        // escape the closure itself, and the manager should only be
-        // used once.
-        unsafe fn with_codegen<'ll, F, R>(
-            tm: &'ll llvm::TargetMachine,
-            llmod: &'ll llvm::Module,
-            no_builtins: bool,
-            f: F,
-        ) -> R
-        where
-            F: FnOnce(&'ll mut PassManager<'ll>) -> R,
-        {
-            unsafe {
-                let cpm = llvm::LLVMCreatePassManager();
-                llvm::LLVMAddAnalysisPasses(tm, cpm);
-                llvm::LLVMRustAddLibraryInfo(cpm, llmod, no_builtins);
-                f(cpm)
-            }
-        }
-
-        // Two things to note:
-        // - If object files are just LLVM bitcode we write bitcode, copy it to
-        //   the .o file, and delete the bitcode if it wasn't otherwise
-        //   requested.
-        // - If we don't have the integrated assembler then we need to emit
-        //   asm from LLVM and use `gcc` to create the object file.
+        // Note that if object files are just LLVM bitcode we write bitcode,
+        // copy it to the .o file, and delete the bitcode if it wasn't
+        // otherwise requested.
 
         let bc_out = cgcx.output_filenames.temp_path(OutputType::Bitcode, module_name);
         let bc_summary_out =
@@ -890,21 +878,17 @@ pub(crate) unsafe fn codegen(
             } else {
                 llmod
             };
-            unsafe {
-                with_codegen(tm, llmod, config.no_builtins, |cpm| {
-                    write_output_file(
-                        dcx,
-                        tm,
-                        cpm,
-                        llmod,
-                        &path,
-                        None,
-                        llvm::FileType::AssemblyFile,
-                        &cgcx.prof,
-                        config.verify_llvm_ir,
-                    )
-                })?;
-            }
+            write_output_file(
+                dcx,
+                tm,
+                config.no_builtins,
+                llmod,
+                &path,
+                None,
+                llvm::FileType::AssemblyFile,
+                &cgcx.prof,
+                config.verify_llvm_ir,
+            )?;
         }
 
         match config.emit_obj {
@@ -928,21 +912,17 @@ pub(crate) unsafe fn codegen(
                     (_, SplitDwarfKind::Split) => Some(dwo_out.as_path()),
                 };
 
-                unsafe {
-                    with_codegen(tm, llmod, config.no_builtins, |cpm| {
-                        write_output_file(
-                            dcx,
-                            tm,
-                            cpm,
-                            llmod,
-                            &obj_out,
-                            dwo_out,
-                            llvm::FileType::ObjectFile,
-                            &cgcx.prof,
-                            config.verify_llvm_ir,
-                        )
-                    })?;
-                }
+                write_output_file(
+                    dcx,
+                    tm,
+                    config.no_builtins,
+                    llmod,
+                    &obj_out,
+                    dwo_out,
+                    llvm::FileType::ObjectFile,
+                    &cgcx.prof,
+                    config.verify_llvm_ir,
+                )?;
             }
 
             EmitObj::Bitcode => {
@@ -1069,24 +1049,18 @@ unsafe fn embed_bitcode(
         {
             // We don't need custom section flags, create LLVM globals.
             let llconst = common::bytes_in_context(llcx, bitcode);
-            let llglobal = llvm::LLVMAddGlobal(
-                llmod,
-                common::val_ty(llconst),
-                c"rustc.embedded.module".as_ptr(),
-            );
-            llvm::LLVMSetInitializer(llglobal, llconst);
+            let llglobal =
+                llvm::add_global(llmod, common::val_ty(llconst), c"rustc.embedded.module");
+            llvm::set_initializer(llglobal, llconst);
 
             llvm::set_section(llglobal, bitcode_section_name(cgcx));
             llvm::set_linkage(llglobal, llvm::Linkage::PrivateLinkage);
             llvm::LLVMSetGlobalConstant(llglobal, llvm::True);
 
             let llconst = common::bytes_in_context(llcx, cmdline.as_bytes());
-            let llglobal = llvm::LLVMAddGlobal(
-                llmod,
-                common::val_ty(llconst),
-                c"rustc.embedded.cmdline".as_ptr(),
-            );
-            llvm::LLVMSetInitializer(llglobal, llconst);
+            let llglobal =
+                llvm::add_global(llmod, common::val_ty(llconst), c"rustc.embedded.cmdline");
+            llvm::set_initializer(llglobal, llconst);
             let section = if cgcx.target_is_like_osx {
                 c"__LLVM,__cmdline"
             } else if cgcx.target_is_like_aix {
@@ -1126,31 +1100,29 @@ fn create_msvc_imps(
     // underscores added in front).
     let prefix = if cgcx.target_arch == "x86" { "\x01__imp__" } else { "\x01__imp_" };
 
-    unsafe {
-        let ptr_ty = Type::ptr_llcx(llcx);
-        let globals = base::iter_globals(llmod)
-            .filter(|&val| {
-                llvm::get_linkage(val) == llvm::Linkage::ExternalLinkage
-                    && llvm::LLVMIsDeclaration(val) == 0
-            })
-            .filter_map(|val| {
-                // Exclude some symbols that we know are not Rust symbols.
-                let name = llvm::get_value_name(val);
-                if ignored(name) { None } else { Some((val, name)) }
-            })
-            .map(move |(val, name)| {
-                let mut imp_name = prefix.as_bytes().to_vec();
-                imp_name.extend(name);
-                let imp_name = CString::new(imp_name).unwrap();
-                (imp_name, val)
-            })
-            .collect::<Vec<_>>();
+    let ptr_ty = Type::ptr_llcx(llcx);
+    let globals = base::iter_globals(llmod)
+        .filter(|&val| {
+            llvm::get_linkage(val) == llvm::Linkage::ExternalLinkage && !llvm::is_declaration(val)
+        })
+        .filter_map(|val| {
+            // Exclude some symbols that we know are not Rust symbols.
+            let name = llvm::get_value_name(val);
+            if ignored(name) { None } else { Some((val, name)) }
+        })
+        .map(move |(val, name)| {
+            let mut imp_name = prefix.as_bytes().to_vec();
+            imp_name.extend(name);
+            let imp_name = CString::new(imp_name).unwrap();
+            (imp_name, val)
+        })
+        .collect::<Vec<_>>();
 
-        for (imp_name, val) in globals {
-            let imp = llvm::LLVMAddGlobal(llmod, ptr_ty, imp_name.as_ptr());
-            llvm::LLVMSetInitializer(imp, val);
-            llvm::set_linkage(imp, llvm::Linkage::ExternalLinkage);
-        }
+    for (imp_name, val) in globals {
+        let imp = llvm::add_global(llmod, ptr_ty, &imp_name);
+
+        llvm::set_initializer(imp, val);
+        llvm::set_linkage(imp, llvm::Linkage::ExternalLinkage);
     }
 
     // Use this function to exclude certain symbols from `__imp` generation.
diff --git a/compiler/rustc_codegen_llvm/src/base.rs b/compiler/rustc_codegen_llvm/src/base.rs
index d05faf5577b..d35c7945bae 100644
--- a/compiler/rustc_codegen_llvm/src/base.rs
+++ b/compiler/rustc_codegen_llvm/src/base.rs
@@ -157,9 +157,7 @@ pub(crate) fn linkage_to_llvm(linkage: Linkage) -> llvm::Linkage {
         Linkage::LinkOnceODR => llvm::Linkage::LinkOnceODRLinkage,
         Linkage::WeakAny => llvm::Linkage::WeakAnyLinkage,
         Linkage::WeakODR => llvm::Linkage::WeakODRLinkage,
-        Linkage::Appending => llvm::Linkage::AppendingLinkage,
         Linkage::Internal => llvm::Linkage::InternalLinkage,
-        Linkage::Private => llvm::Linkage::PrivateLinkage,
         Linkage::ExternalWeak => llvm::Linkage::ExternalWeakLinkage,
         Linkage::Common => llvm::Linkage::CommonLinkage,
     }
diff --git a/compiler/rustc_codegen_llvm/src/builder.rs b/compiler/rustc_codegen_llvm/src/builder.rs
index fbeedef314d..acae0b444c0 100644
--- a/compiler/rustc_codegen_llvm/src/builder.rs
+++ b/compiler/rustc_codegen_llvm/src/builder.rs
@@ -1,4 +1,4 @@
-use std::borrow::Cow;
+use std::borrow::{Borrow, Cow};
 use std::ops::Deref;
 use std::{iter, ptr};
 
@@ -31,20 +31,22 @@ use tracing::{debug, instrument};
 use crate::abi::FnAbiLlvmExt;
 use crate::attributes;
 use crate::common::Funclet;
-use crate::context::CodegenCx;
+use crate::context::{CodegenCx, SimpleCx};
 use crate::llvm::{self, AtomicOrdering, AtomicRmwBinOp, BasicBlock, False, True};
 use crate::type_::Type;
 use crate::type_of::LayoutLlvmExt;
 use crate::value::Value;
 
-// All Builders must have an llfn associated with them
 #[must_use]
-pub(crate) struct Builder<'a, 'll, 'tcx> {
+pub(crate) struct GenericBuilder<'a, 'll, CX: Borrow<SimpleCx<'ll>>> {
     pub llbuilder: &'ll mut llvm::Builder<'ll>,
-    pub cx: &'a CodegenCx<'ll, 'tcx>,
+    pub cx: &'a CX,
 }
 
-impl Drop for Builder<'_, '_, '_> {
+pub(crate) type SBuilder<'a, 'll> = GenericBuilder<'a, 'll, SimpleCx<'ll>>;
+pub(crate) type Builder<'a, 'll, 'tcx> = GenericBuilder<'a, 'll, CodegenCx<'ll, 'tcx>>;
+
+impl<'a, 'll, CX: Borrow<SimpleCx<'ll>>> Drop for GenericBuilder<'a, 'll, CX> {
     fn drop(&mut self) {
         unsafe {
             llvm::LLVMDisposeBuilder(&mut *(self.llbuilder as *mut _));
@@ -52,6 +54,112 @@ impl Drop for Builder<'_, '_, '_> {
     }
 }
 
+impl<'a, 'll> SBuilder<'a, 'll> {
+    fn call(
+        &mut self,
+        llty: &'ll Type,
+        llfn: &'ll Value,
+        args: &[&'ll Value],
+        funclet: Option<&Funclet<'ll>>,
+    ) -> &'ll Value {
+        debug!("call {:?} with args ({:?})", llfn, args);
+
+        let args = self.check_call("call", llty, llfn, args);
+        let funclet_bundle = funclet.map(|funclet| funclet.bundle());
+        let mut bundles: SmallVec<[_; 2]> = SmallVec::new();
+        if let Some(funclet_bundle) = funclet_bundle {
+            bundles.push(funclet_bundle);
+        }
+
+        let call = unsafe {
+            llvm::LLVMBuildCallWithOperandBundles(
+                self.llbuilder,
+                llty,
+                llfn,
+                args.as_ptr() as *const &llvm::Value,
+                args.len() as c_uint,
+                bundles.as_ptr(),
+                bundles.len() as c_uint,
+                c"".as_ptr(),
+            )
+        };
+        call
+    }
+
+    fn with_scx(scx: &'a SimpleCx<'ll>) -> Self {
+        // Create a fresh builder from the simple context.
+        let llbuilder = unsafe { llvm::LLVMCreateBuilderInContext(scx.llcx) };
+        SBuilder { llbuilder, cx: scx }
+    }
+}
+impl<'a, 'll, CX: Borrow<SimpleCx<'ll>>> GenericBuilder<'a, 'll, CX> {
+    pub(crate) fn bitcast(&mut self, val: &'ll Value, dest_ty: &'ll Type) -> &'ll Value {
+        unsafe { llvm::LLVMBuildBitCast(self.llbuilder, val, dest_ty, UNNAMED) }
+    }
+
+    fn ret_void(&mut self) {
+        unsafe {
+            llvm::LLVMBuildRetVoid(self.llbuilder);
+        }
+    }
+
+    fn ret(&mut self, v: &'ll Value) {
+        unsafe {
+            llvm::LLVMBuildRet(self.llbuilder, v);
+        }
+    }
+}
+impl<'a, 'll> SBuilder<'a, 'll> {
+    fn build(cx: &'a SimpleCx<'ll>, llbb: &'ll BasicBlock) -> SBuilder<'a, 'll> {
+        let bx = SBuilder::with_scx(cx);
+        unsafe {
+            llvm::LLVMPositionBuilderAtEnd(bx.llbuilder, llbb);
+        }
+        bx
+    }
+
+    fn check_call<'b>(
+        &mut self,
+        typ: &str,
+        fn_ty: &'ll Type,
+        llfn: &'ll Value,
+        args: &'b [&'ll Value],
+    ) -> Cow<'b, [&'ll Value]> {
+        assert!(
+            self.cx.type_kind(fn_ty) == TypeKind::Function,
+            "builder::{typ} not passed a function, but {fn_ty:?}"
+        );
+
+        let param_tys = self.cx.func_params_types(fn_ty);
+
+        let all_args_match = iter::zip(&param_tys, args.iter().map(|&v| self.cx.val_ty(v)))
+            .all(|(expected_ty, actual_ty)| *expected_ty == actual_ty);
+
+        if all_args_match {
+            return Cow::Borrowed(args);
+        }
+
+        let casted_args: Vec<_> = iter::zip(param_tys, args)
+            .enumerate()
+            .map(|(i, (expected_ty, &actual_val))| {
+                let actual_ty = self.cx.val_ty(actual_val);
+                if expected_ty != actual_ty {
+                    debug!(
+                        "type mismatch in function call of {:?}. \
+                            Expected {:?} for param {}, got {:?}; injecting bitcast",
+                        llfn, expected_ty, i, actual_ty
+                    );
+                    self.bitcast(actual_val, expected_ty)
+                } else {
+                    actual_val
+                }
+            })
+            .collect();
+
+        Cow::Owned(casted_args)
+    }
+}
+
 /// Empty string, to be used where LLVM expects an instruction name, indicating
 /// that the instruction is to be left unnamed (i.e. numbered, in textual IR).
 // FIXME(eddyb) pass `&CStr` directly to FFI once it's a thin pointer.
@@ -313,6 +421,20 @@ impl<'a, 'll, 'tcx> BuilderMethods<'a, 'tcx> for Builder<'a, 'll, 'tcx> {
         unchecked_umul(x, y) => LLVMBuildNUWMul,
     }
 
+    fn or_disjoint(&mut self, a: &'ll Value, b: &'ll Value) -> &'ll Value {
+        unsafe {
+            let or = llvm::LLVMBuildOr(self.llbuilder, a, b, UNNAMED);
+
+            // If a and b are both values, then `or` is a value, rather than
+            // an instruction, so we need to check before setting the flag.
+            // (See also `LLVMBuildNUWNeg` which also needs a check.)
+            if llvm::LLVMIsAInstruction(or).is_some() {
+                llvm::LLVMSetIsDisjoint(or, True);
+            }
+            or
+        }
+    }
+
     set_math_builder_methods! {
         fadd_fast(x, y) => (LLVMBuildFAdd, LLVMRustSetFastMath),
         fsub_fast(x, y) => (LLVMBuildFSub, LLVMRustSetFastMath),
@@ -612,14 +734,6 @@ impl<'a, 'll, 'tcx> BuilderMethods<'a, 'tcx> for Builder<'a, 'll, 'tcx> {
     }
 
     fn range_metadata(&mut self, load: &'ll Value, range: WrappingRange) {
-        if self.sess().target.arch == "amdgpu" {
-            // amdgpu/LLVM does something weird and thinks an i64 value is
-            // split into a v2i32, halving the bitwidth LLVM expects,
-            // tripping an assertion. So, for now, just disable this
-            // optimization.
-            return;
-        }
-
         if self.cx.sess().opts.optimize == OptLevel::No {
             // Don't emit metadata we're not going to use
             return;
@@ -1227,11 +1341,21 @@ impl<'a, 'll, 'tcx> BuilderMethods<'a, 'tcx> for Builder<'a, 'll, 'tcx> {
 impl<'ll> StaticBuilderMethods for Builder<'_, 'll, '_> {
     fn get_static(&mut self, def_id: DefId) -> &'ll Value {
         // Forward to the `get_static` method of `CodegenCx`
-        self.cx().get_static(def_id)
+        let s = self.cx().get_static(def_id);
+        // Cast to default address space if globals are in a different addrspace
+        self.cx().const_pointercast(s, self.type_ptr())
     }
 }
 
 impl<'a, 'll, 'tcx> Builder<'a, 'll, 'tcx> {
+    fn build(cx: &'a CodegenCx<'ll, 'tcx>, llbb: &'ll BasicBlock) -> Builder<'a, 'll, 'tcx> {
+        let bx = Builder::with_cx(cx);
+        unsafe {
+            llvm::LLVMPositionBuilderAtEnd(bx.llbuilder, llbb);
+        }
+        bx
+    }
+
     fn with_cx(cx: &'a CodegenCx<'ll, 'tcx>) -> Self {
         // Create a fresh builder from the crate context.
         let llbuilder = unsafe { llvm::LLVMCreateBuilderInContext(cx.llcx) };
@@ -1241,13 +1365,16 @@ impl<'a, 'll, 'tcx> Builder<'a, 'll, 'tcx> {
     pub(crate) fn llfn(&self) -> &'ll Value {
         unsafe { llvm::LLVMGetBasicBlockParent(self.llbb()) }
     }
+}
 
+impl<'a, 'll, CX: Borrow<SimpleCx<'ll>>> GenericBuilder<'a, 'll, CX> {
     fn position_at_start(&mut self, llbb: &'ll BasicBlock) {
         unsafe {
             llvm::LLVMRustPositionBuilderAtStart(self.llbuilder, llbb);
         }
     }
-
+}
+impl<'a, 'll, 'tcx> Builder<'a, 'll, 'tcx> {
     fn align_metadata(&mut self, load: &'ll Value, align: Align) {
         unsafe {
             let md = [llvm::LLVMValueAsMetadata(self.cx.const_u64(align.bytes()))];
@@ -1269,7 +1396,8 @@ impl<'a, 'll, 'tcx> Builder<'a, 'll, 'tcx> {
             self.set_metadata(inst, llvm::MD_unpredictable, md);
         }
     }
-
+}
+impl<'a, 'll, CX: Borrow<SimpleCx<'ll>>> GenericBuilder<'a, 'll, CX> {
     pub(crate) fn minnum(&mut self, lhs: &'ll Value, rhs: &'ll Value) -> &'ll Value {
         unsafe { llvm::LLVMRustBuildMinNum(self.llbuilder, lhs, rhs) }
     }
@@ -1370,7 +1498,9 @@ impl<'a, 'll, 'tcx> Builder<'a, 'll, 'tcx> {
         let ret = unsafe { llvm::LLVMBuildCatchRet(self.llbuilder, funclet.cleanuppad(), unwind) };
         ret.expect("LLVM does not have support for catchret")
     }
+}
 
+impl<'a, 'll, 'tcx> Builder<'a, 'll, 'tcx> {
     fn check_call<'b>(
         &mut self,
         typ: &str,
@@ -1411,11 +1541,13 @@ impl<'a, 'll, 'tcx> Builder<'a, 'll, 'tcx> {
 
         Cow::Owned(casted_args)
     }
-
+}
+impl<'a, 'll, CX: Borrow<SimpleCx<'ll>>> GenericBuilder<'a, 'll, CX> {
     pub(crate) fn va_arg(&mut self, list: &'ll Value, ty: &'ll Type) -> &'ll Value {
         unsafe { llvm::LLVMBuildVAArg(self.llbuilder, list, ty, UNNAMED) }
     }
-
+}
+impl<'a, 'll, 'tcx> Builder<'a, 'll, 'tcx> {
     pub(crate) fn call_intrinsic(&mut self, intrinsic: &str, args: &[&'ll Value]) -> &'ll Value {
         let (ty, f) = self.cx.get_intrinsic(intrinsic);
         self.call(ty, None, None, f, args, None, None)
@@ -1433,7 +1565,8 @@ impl<'a, 'll, 'tcx> Builder<'a, 'll, 'tcx> {
 
         self.call_intrinsic(intrinsic, &[self.cx.const_u64(size), ptr]);
     }
-
+}
+impl<'a, 'll, CX: Borrow<SimpleCx<'ll>>> GenericBuilder<'a, 'll, CX> {
     pub(crate) fn phi(
         &mut self,
         ty: &'ll Type,
@@ -1453,7 +1586,8 @@ impl<'a, 'll, 'tcx> Builder<'a, 'll, 'tcx> {
             llvm::LLVMAddIncoming(phi, &val, &bb, 1 as c_uint);
         }
     }
-
+}
+impl<'a, 'll, 'tcx> Builder<'a, 'll, 'tcx> {
     fn fptoint_sat(&mut self, signed: bool, val: &'ll Value, dest_ty: &'ll Type) -> &'ll Value {
         let src_ty = self.cx.val_ty(val);
         let (float_ty, int_ty, vector_length) = if self.cx.type_kind(src_ty) == TypeKind::Vector {
diff --git a/compiler/rustc_codegen_llvm/src/builder/autodiff.rs b/compiler/rustc_codegen_llvm/src/builder/autodiff.rs
index 38f7eaa090f..b2c1088e3fc 100644
--- a/compiler/rustc_codegen_llvm/src/builder/autodiff.rs
+++ b/compiler/rustc_codegen_llvm/src/builder/autodiff.rs
@@ -3,20 +3,18 @@ use std::ptr;
 use rustc_ast::expand::autodiff_attrs::{AutoDiffAttrs, AutoDiffItem, DiffActivity, DiffMode};
 use rustc_codegen_ssa::ModuleCodegen;
 use rustc_codegen_ssa::back::write::ModuleConfig;
-use rustc_codegen_ssa::traits::{BaseTypeCodegenMethods, BuilderMethods};
 use rustc_errors::FatalError;
-use rustc_middle::ty::TyCtxt;
-use rustc_session::config::Lto;
 use tracing::{debug, trace};
 
-use crate::back::write::{llvm_err, llvm_optimize};
-use crate::builder::Builder;
-use crate::declare::declare_raw_fn;
+use crate::back::write::llvm_err;
+use crate::builder::SBuilder;
+use crate::context::SimpleCx;
+use crate::declare::declare_simple_fn;
 use crate::errors::LlvmError;
 use crate::llvm::AttributePlace::Function;
 use crate::llvm::{Metadata, True};
 use crate::value::Value;
-use crate::{CodegenContext, LlvmCodegenBackend, ModuleLlvm, attributes, context, llvm};
+use crate::{CodegenContext, LlvmCodegenBackend, ModuleLlvm, attributes, llvm};
 
 fn get_params(fnc: &Value) -> Vec<&Value> {
     unsafe {
@@ -38,8 +36,8 @@ fn get_params(fnc: &Value) -> Vec<&Value> {
 /// [^1]: <https://enzyme.mit.edu/getting_started/CallingConvention/>
 // FIXME(ZuseZ4): `outer_fn` should include upstream safety checks to
 // cover some assumptions of enzyme/autodiff, which could lead to UB otherwise.
-fn generate_enzyme_call<'ll, 'tcx>(
-    cx: &context::CodegenCx<'ll, 'tcx>,
+fn generate_enzyme_call<'ll>(
+    cx: &SimpleCx<'ll>,
     fn_to_diff: &'ll Value,
     outer_fn: &'ll Value,
     attrs: AutoDiffAttrs,
@@ -54,8 +52,6 @@ fn generate_enzyme_call<'ll, 'tcx>(
     let mut ad_name: String = match attrs.mode {
         DiffMode::Forward => "__enzyme_fwddiff",
         DiffMode::Reverse => "__enzyme_autodiff",
-        DiffMode::ForwardFirst => "__enzyme_fwddiff",
-        DiffMode::ReverseFirst => "__enzyme_autodiff",
         _ => panic!("logic bug in autodiff, unrecognized mode"),
     }
     .to_string();
@@ -63,8 +59,8 @@ fn generate_enzyme_call<'ll, 'tcx>(
     // add outer_fn name to ad_name to make it unique, in case users apply autodiff to multiple
     // functions. Unwrap will only panic, if LLVM gave us an invalid string.
     let name = llvm::get_value_name(outer_fn);
-    let outer_fn_name = std::ffi::CStr::from_bytes_with_nul(name).unwrap().to_str().unwrap();
-    ad_name.push_str(outer_fn_name.to_string().as_str());
+    let outer_fn_name = std::str::from_utf8(name).unwrap();
+    ad_name.push_str(outer_fn_name);
 
     // Let us assume the user wrote the following function square:
     //
@@ -112,7 +108,7 @@ fn generate_enzyme_call<'ll, 'tcx>(
         //FIXME(ZuseZ4): the CC/Addr/Vis values are best effort guesses, we should look at tests and
         // think a bit more about what should go here.
         let cc = llvm::LLVMGetFunctionCallConv(outer_fn);
-        let ad_fn = declare_raw_fn(
+        let ad_fn = declare_simple_fn(
             cx,
             &ad_name,
             llvm::CallConv::try_from(cc).expect("invalid callconv"),
@@ -132,7 +128,7 @@ fn generate_enzyme_call<'ll, 'tcx>(
         llvm::LLVMRustEraseInstFromParent(br);
 
         let last_inst = llvm::LLVMRustGetLastInstruction(entry).unwrap();
-        let mut builder = Builder::build(cx, entry);
+        let mut builder = SBuilder::build(cx, entry);
 
         let num_args = llvm::LLVMCountParams(&fn_to_diff);
         let mut args = Vec::with_capacity(num_args as usize + 1);
@@ -154,7 +150,7 @@ fn generate_enzyme_call<'ll, 'tcx>(
             _ => {}
         }
 
-        trace!("matching autodiff arguments");
+        debug!("matching autodiff arguments");
         // We now handle the issue that Rust level arguments not always match the llvm-ir level
         // arguments. A slice, `&[f32]`, for example, is represented as a pointer and a length on
         // llvm-ir level. The number of activities matches the number of Rust level arguments, so we
@@ -165,10 +161,10 @@ fn generate_enzyme_call<'ll, 'tcx>(
         let mut activity_pos = 0;
         let outer_args: Vec<&llvm::Value> = get_params(outer_fn);
         while activity_pos < inputs.len() {
-            let activity = inputs[activity_pos as usize];
+            let diff_activity = inputs[activity_pos as usize];
             // Duplicated arguments received a shadow argument, into which enzyme will write the
             // gradient.
-            let (activity, duplicated): (&Metadata, bool) = match activity {
+            let (activity, duplicated): (&Metadata, bool) = match diff_activity {
                 DiffActivity::None => panic!("not a valid input activity"),
                 DiffActivity::Const => (enzyme_const, false),
                 DiffActivity::Active => (enzyme_out, false),
@@ -223,7 +219,15 @@ fn generate_enzyme_call<'ll, 'tcx>(
                     // A duplicated pointer will have the following two outer_fn arguments:
                     // (..., ptr, ptr, ...). We add the following llvm-ir to our __enzyme call:
                     // (..., metadata! enzyme_dup, ptr, ptr, ...).
-                    assert!(llvm::LLVMRustGetTypeKind(next_outer_ty) == llvm::TypeKind::Pointer);
+                    if matches!(
+                        diff_activity,
+                        DiffActivity::Duplicated | DiffActivity::DuplicatedOnly
+                    ) {
+                        assert!(
+                            llvm::LLVMRustGetTypeKind(next_outer_ty) == llvm::TypeKind::Pointer
+                        );
+                    }
+                    // In the case of Dual we don't have assumptions, e.g. f32 would be valid.
                     args.push(next_outer_arg);
                     outer_pos += 2;
                     activity_pos += 1;
@@ -236,7 +240,7 @@ fn generate_enzyme_call<'ll, 'tcx>(
             }
         }
 
-        let call = builder.call(enzyme_ty, None, None, ad_fn, &args, None, None);
+        let call = builder.call(enzyme_ty, ad_fn, &args, None);
 
         // This part is a bit iffy. LLVM requires that a call to an inlineable function has some
         // metadata attachted to it, but we just created this code oota. Given that the
@@ -256,14 +260,14 @@ fn generate_enzyme_call<'ll, 'tcx>(
             // have no debug info to copy, which would then be ok.
             trace!("no dbg info");
         }
+
         // Now that we copied the metadata, get rid of dummy code.
-        llvm::LLVMRustEraseInstBefore(entry, last_inst);
-        llvm::LLVMRustEraseInstFromParent(last_inst);
+        llvm::LLVMRustEraseInstUntilInclusive(entry, last_inst);
 
-        if cx.val_ty(outer_fn) != cx.type_void() {
-            builder.ret(call);
-        } else {
+        if cx.val_ty(call) == cx.type_void() {
             builder.ret_void();
+        } else {
+            builder.ret(call);
         }
 
         // Let's crash in case that we messed something up above and generated invalid IR.
@@ -274,40 +278,44 @@ fn generate_enzyme_call<'ll, 'tcx>(
     }
 }
 
-pub(crate) fn differentiate<'ll, 'tcx>(
+pub(crate) fn differentiate<'ll>(
     module: &'ll ModuleCodegen<ModuleLlvm>,
     cgcx: &CodegenContext<LlvmCodegenBackend>,
-    tcx: TyCtxt<'tcx>,
     diff_items: Vec<AutoDiffItem>,
-    config: &ModuleConfig,
+    _config: &ModuleConfig,
 ) -> Result<(), FatalError> {
     for item in &diff_items {
         trace!("{}", item);
     }
 
     let diag_handler = cgcx.create_dcx();
-    let (_, cgus) = tcx.collect_and_partition_mono_items(());
-    let cx = context::CodegenCx::new(tcx, &cgus.first().unwrap(), &module.module_llvm);
+    let cx = SimpleCx { llmod: module.module_llvm.llmod(), llcx: module.module_llvm.llcx };
 
     // Before dumping the module, we want all the TypeTrees to become part of the module.
     for item in diff_items.iter() {
         let name = item.source.clone();
         let fn_def: Option<&llvm::Value> = cx.get_function(&name);
         let Some(fn_def) = fn_def else {
-            return Err(llvm_err(diag_handler.handle(), LlvmError::PrepareAutoDiff {
-                src: item.source.clone(),
-                target: item.target.clone(),
-                error: "could not find source function".to_owned(),
-            }));
+            return Err(llvm_err(
+                diag_handler.handle(),
+                LlvmError::PrepareAutoDiff {
+                    src: item.source.clone(),
+                    target: item.target.clone(),
+                    error: "could not find source function".to_owned(),
+                },
+            ));
         };
         debug!(?item.target);
         let fn_target: Option<&llvm::Value> = cx.get_function(&item.target);
         let Some(fn_target) = fn_target else {
-            return Err(llvm_err(diag_handler.handle(), LlvmError::PrepareAutoDiff {
-                src: item.source.clone(),
-                target: item.target.clone(),
-                error: "could not find target function".to_owned(),
-            }));
+            return Err(llvm_err(
+                diag_handler.handle(),
+                LlvmError::PrepareAutoDiff {
+                    src: item.source.clone(),
+                    target: item.target.clone(),
+                    error: "could not find target function".to_owned(),
+                },
+            ));
         };
 
         generate_enzyme_call(&cx, fn_def, fn_target, item.attrs.clone());
@@ -315,29 +323,6 @@ pub(crate) fn differentiate<'ll, 'tcx>(
 
     // FIXME(ZuseZ4): support SanitizeHWAddress and prevent illegal/unsupported opts
 
-    if let Some(opt_level) = config.opt_level {
-        let opt_stage = match cgcx.lto {
-            Lto::Fat => llvm::OptStage::PreLinkFatLTO,
-            Lto::Thin | Lto::ThinLocal => llvm::OptStage::PreLinkThinLTO,
-            _ if cgcx.opts.cg.linker_plugin_lto.enabled() => llvm::OptStage::PreLinkThinLTO,
-            _ => llvm::OptStage::PreLinkNoLTO,
-        };
-        // This is our second opt call, so now we run all opts,
-        // to make sure we get the best performance.
-        let skip_size_increasing_opts = false;
-        trace!("running Module Optimization after differentiation");
-        unsafe {
-            llvm_optimize(
-                cgcx,
-                diag_handler.handle(),
-                module,
-                config,
-                opt_level,
-                opt_stage,
-                skip_size_increasing_opts,
-            )?
-        };
-    }
     trace!("done with differentiate()");
 
     Ok(())
diff --git a/compiler/rustc_codegen_llvm/src/common.rs b/compiler/rustc_codegen_llvm/src/common.rs
index adfe8aeb5c5..78b3a7f8541 100644
--- a/compiler/rustc_codegen_llvm/src/common.rs
+++ b/compiler/rustc_codegen_llvm/src/common.rs
@@ -126,6 +126,10 @@ impl<'ll, 'tcx> ConstCodegenMethods<'tcx> for CodegenCx<'ll, 'tcx> {
         unsafe { llvm::LLVMGetUndef(t) }
     }
 
+    fn is_undef(&self, v: &'ll Value) -> bool {
+        unsafe { llvm::LLVMIsUndef(v) == True }
+    }
+
     fn const_poison(&self, t: &'ll Type) -> &'ll Value {
         unsafe { llvm::LLVMGetPoison(t) }
     }
@@ -215,12 +219,14 @@ impl<'ll, 'tcx> ConstCodegenMethods<'tcx> for CodegenCx<'ll, 'tcx> {
                 let g = self.define_global(&sym, self.val_ty(sc)).unwrap_or_else(|| {
                     bug!("symbol `{}` is already defined", sym);
                 });
+                llvm::set_initializer(g, sc);
                 unsafe {
-                    llvm::LLVMSetInitializer(g, sc);
                     llvm::LLVMSetGlobalConstant(g, True);
                     llvm::LLVMSetUnnamedAddress(g, llvm::UnnamedAddr::Global);
                 }
                 llvm::set_linkage(g, llvm::Linkage::InternalLinkage);
+                // Cast to default address space if globals are in a different addrspace
+                let g = self.const_pointercast(g, self.type_ptr());
                 (s.to_owned(), g)
             })
             .1;
@@ -285,7 +291,7 @@ impl<'ll, 'tcx> ConstCodegenMethods<'tcx> for CodegenCx<'ll, 'tcx> {
                             let alloc = alloc.inner();
                             let value = match alloc.mutability {
                                 Mutability::Mut => self.static_addr_of_mut(init, alloc.align, None),
-                                _ => self.static_addr_of(init, alloc.align, None),
+                                _ => self.static_addr_of_impl(init, alloc.align, None),
                             };
                             if !self.sess().fewer_names() && llvm::get_value_name(value).is_empty()
                             {
@@ -308,10 +314,15 @@ impl<'ll, 'tcx> ConstCodegenMethods<'tcx> for CodegenCx<'ll, 'tcx> {
                     GlobalAlloc::VTable(ty, dyn_ty) => {
                         let alloc = self
                             .tcx
-                            .global_alloc(self.tcx.vtable_allocation((ty, dyn_ty.principal())))
+                            .global_alloc(self.tcx.vtable_allocation((
+                                ty,
+                                dyn_ty.principal().map(|principal| {
+                                    self.tcx.instantiate_bound_regions_with_erased(principal)
+                                }),
+                            )))
                             .unwrap_memory();
                         let init = const_alloc_to_llvm(self, alloc, /*static*/ false);
-                        let value = self.static_addr_of(init, alloc.inner().align, None);
+                        let value = self.static_addr_of_impl(init, alloc.inner().align, None);
                         (value, AddressSpace::DATA)
                     }
                     GlobalAlloc::Static(def_id) => {
@@ -323,7 +334,8 @@ impl<'ll, 'tcx> ConstCodegenMethods<'tcx> for CodegenCx<'ll, 'tcx> {
                 let llval = unsafe {
                     llvm::LLVMConstInBoundsGEP2(
                         self.type_i8(),
-                        self.const_bitcast(base_addr, self.type_ptr_ext(base_addr_space)),
+                        // Cast to the required address space if necessary
+                        self.const_pointercast(base_addr, self.type_ptr_ext(base_addr_space)),
                         &self.const_usize(offset.bytes()),
                         1,
                     )
diff --git a/compiler/rustc_codegen_llvm/src/consts.rs b/compiler/rustc_codegen_llvm/src/consts.rs
index c7114480d8b..4a5491ec7a1 100644
--- a/compiler/rustc_codegen_llvm/src/consts.rs
+++ b/compiler/rustc_codegen_llvm/src/consts.rs
@@ -191,7 +191,7 @@ fn check_and_apply_linkage<'ll, 'tcx>(
             })
         });
         llvm::set_linkage(g2, llvm::Linkage::InternalLinkage);
-        unsafe { llvm::LLVMSetInitializer(g2, g1) };
+        llvm::set_initializer(g2, g1);
         g2
     } else if cx.tcx.sess.target.arch == "x86"
         && common::is_mingw_gnu_toolchain(&cx.tcx.sess.target)
@@ -210,6 +210,14 @@ impl<'ll> CodegenCx<'ll, '_> {
         unsafe { llvm::LLVMConstBitCast(val, ty) }
     }
 
+    pub(crate) fn const_pointercast(&self, val: &'ll Value, ty: &'ll Type) -> &'ll Value {
+        unsafe { llvm::LLVMConstPointerCast(val, ty) }
+    }
+
+    /// Create a global variable.
+    ///
+    /// The returned global variable is a pointer in the default address space for globals.
+    /// Fails if a symbol with the given name already exists.
     pub(crate) fn static_addr_of_mut(
         &self,
         cv: &'ll Value,
@@ -227,12 +235,40 @@ impl<'ll> CodegenCx<'ll, '_> {
             }
             _ => self.define_private_global(self.val_ty(cv)),
         };
-        unsafe { llvm::LLVMSetInitializer(gv, cv) };
+        llvm::set_initializer(gv, cv);
         set_global_alignment(self, gv, align);
         llvm::SetUnnamedAddress(gv, llvm::UnnamedAddr::Global);
         gv
     }
 
+    /// Create a global constant.
+    ///
+    /// The returned global variable is a pointer in the default address space for globals.
+    pub(crate) fn static_addr_of_impl(
+        &self,
+        cv: &'ll Value,
+        align: Align,
+        kind: Option<&str>,
+    ) -> &'ll Value {
+        if let Some(&gv) = self.const_globals.borrow().get(&cv) {
+            unsafe {
+                // Upgrade the alignment in cases where the same constant is used with different
+                // alignment requirements
+                let llalign = align.bytes() as u32;
+                if llalign > llvm::LLVMGetAlignment(gv) {
+                    llvm::LLVMSetAlignment(gv, llalign);
+                }
+            }
+            return gv;
+        }
+        let gv = self.static_addr_of_mut(cv, align, kind);
+        unsafe {
+            llvm::LLVMSetGlobalConstant(gv, True);
+        }
+        self.const_globals.borrow_mut().insert(cv, gv);
+        gv
+    }
+
     #[instrument(level = "debug", skip(self))]
     pub(crate) fn get_static(&self, def_id: DefId) -> &'ll Value {
         let instance = Instance::mono(self.tcx, def_id);
@@ -384,8 +420,14 @@ impl<'ll> CodegenCx<'ll, '_> {
             let g = if val_llty == llty {
                 g
             } else {
-                // If we created the global with the wrong type,
-                // correct the type.
+                // codegen_static_initializer creates the global value just from the
+                // `Allocation` data by generating one big struct value that is just
+                // all the bytes and pointers after each other. This will almost never
+                // match the type that the static was declared with. Unfortunately
+                // we can't just LLVMConstBitCast our way out of it because that has very
+                // specific rules on what can be cast. So instead of adding a new way to
+                // generate static initializers that match the static's type, we picked
+                // the easier option and retroactively change the type of the static item itself.
                 let name = llvm::get_value_name(g).to_vec();
                 llvm::set_value_name(g, b"");
 
@@ -416,7 +458,7 @@ impl<'ll> CodegenCx<'ll, '_> {
                 new_g
             };
             set_global_alignment(self, g, alloc.align);
-            llvm::LLVMSetInitializer(g, v);
+            llvm::set_initializer(g, v);
 
             if self.should_assume_dso_local(g, true) {
                 llvm::LLVMRustSetDSOLocal(g, true);
@@ -505,24 +547,15 @@ impl<'ll> CodegenCx<'ll, '_> {
 }
 
 impl<'ll> StaticCodegenMethods for CodegenCx<'ll, '_> {
+    /// Get a pointer to a global variable.
+    ///
+    /// The pointer will always be in the default address space. If global variables default to a
+    /// different address space, an addrspacecast is inserted.
     fn static_addr_of(&self, cv: &'ll Value, align: Align, kind: Option<&str>) -> &'ll Value {
-        if let Some(&gv) = self.const_globals.borrow().get(&cv) {
-            unsafe {
-                // Upgrade the alignment in cases where the same constant is used with different
-                // alignment requirements
-                let llalign = align.bytes() as u32;
-                if llalign > llvm::LLVMGetAlignment(gv) {
-                    llvm::LLVMSetAlignment(gv, llalign);
-                }
-            }
-            return gv;
-        }
-        let gv = self.static_addr_of_mut(cv, align, kind);
-        unsafe {
-            llvm::LLVMSetGlobalConstant(gv, True);
-        }
-        self.const_globals.borrow_mut().insert(cv, gv);
-        gv
+        let gv = self.static_addr_of_impl(cv, align, kind);
+        // static_addr_of_impl returns the bare global variable, which might not be in the default
+        // address space. Cast to the default address space if necessary.
+        self.const_pointercast(gv, self.type_ptr())
     }
 
     fn codegen_static(&self, def_id: DefId) {
diff --git a/compiler/rustc_codegen_llvm/src/context.rs b/compiler/rustc_codegen_llvm/src/context.rs
index d8fbe51b975..7fe527a4c07 100644
--- a/compiler/rustc_codegen_llvm/src/context.rs
+++ b/compiler/rustc_codegen_llvm/src/context.rs
@@ -1,11 +1,13 @@
 use std::borrow::Borrow;
 use std::cell::{Cell, RefCell};
 use std::ffi::{CStr, c_char, c_uint};
+use std::ops::Deref;
 use std::str;
 
 use rustc_abi::{HasDataLayout, TargetDataLayout, VariantIdx};
 use rustc_codegen_ssa::back::versioned_llvm_target;
 use rustc_codegen_ssa::base::{wants_msvc_seh, wants_wasm_eh};
+use rustc_codegen_ssa::common::TypeKind;
 use rustc_codegen_ssa::errors as ssa_errors;
 use rustc_codegen_ssa::traits::*;
 use rustc_data_structures::base_n::{ALPHANUMERIC_ONLY, ToBaseN};
@@ -30,30 +32,52 @@ use smallvec::SmallVec;
 
 use crate::back::write::to_llvm_code_model;
 use crate::callee::get_fn;
-use crate::common::AsCCharPtr;
+use crate::common::{self, AsCCharPtr};
 use crate::debuginfo::metadata::apply_vcall_visibility_metadata;
 use crate::llvm::{Metadata, MetadataType};
 use crate::type_::Type;
 use crate::value::Value;
 use crate::{attributes, coverageinfo, debuginfo, llvm, llvm_util};
 
+/// `TyCtxt` (and related cache datastructures) can't be move between threads.
+/// However, there are various cx related functions which we want to be available to the builder and
+/// other compiler pieces. Here we define a small subset which has enough information and can be
+/// moved around more freely.
+pub(crate) struct SimpleCx<'ll> {
+    pub llmod: &'ll llvm::Module,
+    pub llcx: &'ll llvm::Context,
+}
+
+impl<'ll> Borrow<SimpleCx<'ll>> for CodegenCx<'ll, '_> {
+    fn borrow(&self) -> &SimpleCx<'ll> {
+        &self.scx
+    }
+}
+
+impl<'ll, 'tcx> Deref for CodegenCx<'ll, 'tcx> {
+    type Target = SimpleCx<'ll>;
+
+    #[inline]
+    fn deref(&self) -> &Self::Target {
+        &self.scx
+    }
+}
+
 /// There is one `CodegenCx` per codegen unit. Each one has its own LLVM
 /// `llvm::Context` so that several codegen units may be processed in parallel.
 /// All other LLVM data structures in the `CodegenCx` are tied to that `llvm::Context`.
 pub(crate) struct CodegenCx<'ll, 'tcx> {
     pub tcx: TyCtxt<'tcx>,
+    pub scx: SimpleCx<'ll>,
     pub use_dll_storage_attrs: bool,
     pub tls_model: llvm::ThreadLocalMode,
 
-    pub llmod: &'ll llvm::Module,
-    pub llcx: &'ll llvm::Context,
     pub codegen_unit: &'tcx CodegenUnit<'tcx>,
 
     /// Cache instances of monomorphic and polymorphic items
     pub instances: RefCell<FxHashMap<Instance<'tcx>, &'ll Value>>,
     /// Cache generated vtables
-    pub vtables:
-        RefCell<FxHashMap<(Ty<'tcx>, Option<ty::PolyExistentialTraitRef<'tcx>>), &'ll Value>>,
+    pub vtables: RefCell<FxHashMap<(Ty<'tcx>, Option<ty::ExistentialTraitRef<'tcx>>), &'ll Value>>,
     /// Cache of constant strings,
     pub const_str_cache: RefCell<FxHashMap<String, &'ll Value>>,
 
@@ -553,10 +577,9 @@ impl<'ll, 'tcx> CodegenCx<'ll, 'tcx> {
 
         CodegenCx {
             tcx,
+            scx: SimpleCx { llcx, llmod },
             use_dll_storage_attrs,
             tls_model,
-            llmod,
-            llcx,
             codegen_unit,
             instances: Default::default(),
             vtables: Default::default(),
@@ -593,12 +616,15 @@ impl<'ll, 'tcx> CodegenCx<'ll, 'tcx> {
     pub(crate) fn create_used_variable_impl(&self, name: &'static CStr, values: &[&'ll Value]) {
         let array = self.const_array(self.type_ptr(), values);
 
-        unsafe {
-            let g = llvm::LLVMAddGlobal(self.llmod, self.val_ty(array), name.as_ptr());
-            llvm::LLVMSetInitializer(g, array);
-            llvm::set_linkage(g, llvm::Linkage::AppendingLinkage);
-            llvm::set_section(g, c"llvm.metadata");
-        }
+        let g = llvm::add_global(self.llmod, self.val_ty(array), name);
+        llvm::set_initializer(g, array);
+        llvm::set_linkage(g, llvm::Linkage::AppendingLinkage);
+        llvm::set_section(g, c"llvm.metadata");
+    }
+}
+impl<'ll> SimpleCx<'ll> {
+    pub(crate) fn val_ty(&self, v: &'ll Value) -> &'ll Type {
+        common::val_ty(v)
     }
 
     pub(crate) fn get_metadata_value(&self, metadata: &'ll Metadata) -> &'ll Value {
@@ -625,20 +651,23 @@ impl<'ll, 'tcx> CodegenCx<'ll, 'tcx> {
             llvm::LLVMMDStringInContext2(self.llcx, name.as_ptr() as *const c_char, name.len())
         })
     }
+
+    pub(crate) fn type_kind(&self, ty: &'ll Type) -> TypeKind {
+        unsafe { llvm::LLVMRustGetTypeKind(ty).to_generic() }
+    }
 }
 
 impl<'ll, 'tcx> MiscCodegenMethods<'tcx> for CodegenCx<'ll, 'tcx> {
     fn vtables(
         &self,
-    ) -> &RefCell<FxHashMap<(Ty<'tcx>, Option<ty::PolyExistentialTraitRef<'tcx>>), &'ll Value>>
-    {
+    ) -> &RefCell<FxHashMap<(Ty<'tcx>, Option<ty::ExistentialTraitRef<'tcx>>), &'ll Value>> {
         &self.vtables
     }
 
     fn apply_vcall_visibility_metadata(
         &self,
         ty: Ty<'tcx>,
-        poly_trait_ref: Option<ty::PolyExistentialTraitRef<'tcx>>,
+        poly_trait_ref: Option<ty::ExistentialTraitRef<'tcx>>,
         vtable: &'ll Value,
     ) {
         apply_vcall_visibility_metadata(self, ty, poly_trait_ref, vtable);
@@ -741,14 +770,16 @@ impl<'ll, 'tcx> MiscCodegenMethods<'tcx> for CodegenCx<'ll, 'tcx> {
         if self.get_declared_value(entry_name).is_none() {
             Some(self.declare_entry_fn(
                 entry_name,
-                self.sess().target.entry_abi.into(),
+                llvm::CallConv::from_conv(
+                    self.sess().target.entry_abi,
+                    self.sess().target.arch.borrow(),
+                ),
                 llvm::UnnamedAddr::Global,
                 fn_type,
             ))
         } else {
             // If the symbol already exists, it is an error: for example, the user wrote
             // #[no_mangle] extern "C" fn main(..) {..}
-            // instead of #[start]
             None
         }
     }
@@ -1176,6 +1207,20 @@ impl CodegenCx<'_, '_> {
     }
 }
 
+// This is a duplication of the set_metadata function above. However, so far it's the only one
+// shared between both contexts, so it doesn't seem worth it to make the Cx generic like we did it
+// for the Builder.
+impl SimpleCx<'_> {
+    #[allow(unused)]
+    /// A wrapper for [`llvm::LLVMSetMetadata`], but it takes `Metadata` as a parameter instead of `Value`.
+    pub(crate) fn set_metadata<'a>(&self, val: &'a Value, kind_id: MetadataType, md: &'a Metadata) {
+        unsafe {
+            let node = llvm::LLVMMetadataAsValue(&self.llcx, md);
+            llvm::LLVMSetMetadata(val, kind_id as c_uint, node);
+        }
+    }
+}
+
 impl HasDataLayout for CodegenCx<'_, '_> {
     #[inline]
     fn data_layout(&self) -> &TargetDataLayout {
diff --git a/compiler/rustc_codegen_llvm/src/coverageinfo/mapgen.rs b/compiler/rustc_codegen_llvm/src/coverageinfo/mapgen.rs
index b3ad2a0e409..9a2473d6cf2 100644
--- a/compiler/rustc_codegen_llvm/src/coverageinfo/mapgen.rs
+++ b/compiler/rustc_codegen_llvm/src/coverageinfo/mapgen.rs
@@ -9,6 +9,7 @@ use rustc_data_structures::fx::{FxHashSet, FxIndexMap};
 use rustc_hir::def_id::{DefId, LocalDefId};
 use rustc_index::IndexVec;
 use rustc_middle::mir;
+use rustc_middle::mir::mono::MonoItemPartitions;
 use rustc_middle::ty::{self, TyCtxt};
 use rustc_session::RemapFileNameExt;
 use rustc_session::config::RemapPathScopeComponents;
@@ -297,12 +298,13 @@ struct UsageSets<'tcx> {
 /// Prepare sets of definitions that are relevant to deciding whether something
 /// is an "unused function" for coverage purposes.
 fn prepare_usage_sets<'tcx>(tcx: TyCtxt<'tcx>) -> UsageSets<'tcx> {
-    let (all_mono_items, cgus) = tcx.collect_and_partition_mono_items(());
+    let MonoItemPartitions { all_mono_items, codegen_units, .. } =
+        tcx.collect_and_partition_mono_items(());
 
     // Obtain a MIR body for each function participating in codegen, via an
     // arbitrary instance.
     let mut def_ids_seen = FxHashSet::default();
-    let def_and_mir_for_all_mono_fns = cgus
+    let def_and_mir_for_all_mono_fns = codegen_units
         .iter()
         .flat_map(|cgu| cgu.items().keys())
         .filter_map(|item| match item {
diff --git a/compiler/rustc_codegen_llvm/src/coverageinfo/mapgen/covfun.rs b/compiler/rustc_codegen_llvm/src/coverageinfo/mapgen/covfun.rs
index 5428d776f41..c53ea6d4666 100644
--- a/compiler/rustc_codegen_llvm/src/coverageinfo/mapgen/covfun.rs
+++ b/compiler/rustc_codegen_llvm/src/coverageinfo/mapgen/covfun.rs
@@ -11,7 +11,8 @@ use rustc_codegen_ssa::traits::{
     BaseTypeCodegenMethods, ConstCodegenMethods, StaticCodegenMethods,
 };
 use rustc_middle::mir::coverage::{
-    CovTerm, CoverageIdsInfo, Expression, FunctionCoverageInfo, Mapping, MappingKind, Op,
+    BasicCoverageBlock, CovTerm, CoverageIdsInfo, Expression, FunctionCoverageInfo, Mapping,
+    MappingKind, Op,
 };
 use rustc_middle::ty::{Instance, TyCtxt};
 use rustc_span::Span;
@@ -51,9 +52,9 @@ pub(crate) fn prepare_covfun_record<'tcx>(
     is_used: bool,
 ) -> Option<CovfunRecord<'tcx>> {
     let fn_cov_info = tcx.instance_mir(instance.def).function_coverage_info.as_deref()?;
-    let ids_info = tcx.coverage_ids_info(instance.def);
+    let ids_info = tcx.coverage_ids_info(instance.def)?;
 
-    let expressions = prepare_expressions(fn_cov_info, ids_info, is_used);
+    let expressions = prepare_expressions(ids_info);
 
     let mut covfun = CovfunRecord {
         mangled_function_name: tcx.symbol_name(instance).name,
@@ -75,26 +76,14 @@ pub(crate) fn prepare_covfun_record<'tcx>(
 }
 
 /// Convert the function's coverage-counter expressions into a form suitable for FFI.
-fn prepare_expressions(
-    fn_cov_info: &FunctionCoverageInfo,
-    ids_info: &CoverageIdsInfo,
-    is_used: bool,
-) -> Vec<ffi::CounterExpression> {
-    // If any counters or expressions were removed by MIR opts, replace their
-    // terms with zero.
-    let counter_for_term = |term| {
-        if !is_used || ids_info.is_zero_term(term) {
-            ffi::Counter::ZERO
-        } else {
-            ffi::Counter::from_term(term)
-        }
-    };
+fn prepare_expressions(ids_info: &CoverageIdsInfo) -> Vec<ffi::CounterExpression> {
+    let counter_for_term = ffi::Counter::from_term;
 
     // We know that LLVM will optimize out any unused expressions before
     // producing the final coverage map, so there's no need to do the same
     // thing on the Rust side unless we're confident we can do much better.
     // (See `CounterExpressionsMinimizer` in `CoverageMappingWriter.cpp`.)
-    fn_cov_info
+    ids_info
         .expressions
         .iter()
         .map(move |&Expression { lhs, op, rhs }| ffi::CounterExpression {
@@ -136,11 +125,16 @@ fn fill_region_tables<'tcx>(
 
     // For each counter/region pair in this function+file, convert it to a
     // form suitable for FFI.
-    let is_zero_term = |term| !covfun.is_used || ids_info.is_zero_term(term);
     for &Mapping { ref kind, span } in &fn_cov_info.mappings {
-        // If the mapping refers to counters/expressions that were removed by
-        // MIR opts, replace those occurrences with zero.
-        let kind = kind.map_terms(|term| if is_zero_term(term) { CovTerm::Zero } else { term });
+        // If this function is unused, replace all counters with zero.
+        let counter_for_bcb = |bcb: BasicCoverageBlock| -> ffi::Counter {
+            let term = if covfun.is_used {
+                ids_info.term_for_bcb[bcb].expect("every BCB in a mapping was given a term")
+            } else {
+                CovTerm::Zero
+            };
+            ffi::Counter::from_term(term)
+        };
 
         // Convert the `Span` into coordinates that we can pass to LLVM, or
         // discard the span if conversion fails. In rare, cases _all_ of a
@@ -154,23 +148,22 @@ fn fill_region_tables<'tcx>(
             continue;
         }
 
-        match kind {
-            MappingKind::Code(term) => {
-                code_regions
-                    .push(ffi::CodeRegion { cov_span, counter: ffi::Counter::from_term(term) });
+        match *kind {
+            MappingKind::Code { bcb } => {
+                code_regions.push(ffi::CodeRegion { cov_span, counter: counter_for_bcb(bcb) });
             }
-            MappingKind::Branch { true_term, false_term } => {
+            MappingKind::Branch { true_bcb, false_bcb } => {
                 branch_regions.push(ffi::BranchRegion {
                     cov_span,
-                    true_counter: ffi::Counter::from_term(true_term),
-                    false_counter: ffi::Counter::from_term(false_term),
+                    true_counter: counter_for_bcb(true_bcb),
+                    false_counter: counter_for_bcb(false_bcb),
                 });
             }
-            MappingKind::MCDCBranch { true_term, false_term, mcdc_params } => {
+            MappingKind::MCDCBranch { true_bcb, false_bcb, mcdc_params } => {
                 mcdc_branch_regions.push(ffi::MCDCBranchRegion {
                     cov_span,
-                    true_counter: ffi::Counter::from_term(true_term),
-                    false_counter: ffi::Counter::from_term(false_term),
+                    true_counter: counter_for_bcb(true_bcb),
+                    false_counter: counter_for_bcb(false_bcb),
                     mcdc_branch_params: ffi::mcdc::BranchParameters::from(mcdc_params),
                 });
             }
diff --git a/compiler/rustc_codegen_llvm/src/coverageinfo/mod.rs b/compiler/rustc_codegen_llvm/src/coverageinfo/mod.rs
index 7311cd9d230..ea7f581a3cb 100644
--- a/compiler/rustc_codegen_llvm/src/coverageinfo/mod.rs
+++ b/compiler/rustc_codegen_llvm/src/coverageinfo/mod.rs
@@ -8,7 +8,6 @@ use rustc_codegen_ssa::traits::{
 use rustc_data_structures::fx::{FxHashMap, FxIndexSet};
 use rustc_middle::mir::coverage::CoverageKind;
 use rustc_middle::ty::Instance;
-use rustc_middle::ty::layout::HasTyCtxt;
 use tracing::{debug, instrument};
 
 use crate::builder::Builder;
@@ -147,6 +146,10 @@ impl<'tcx> CoverageInfoBuilderMethods<'tcx> for Builder<'_, '_, 'tcx> {
             debug!("function has a coverage statement but no coverage info");
             return;
         };
+        let Some(ids_info) = bx.tcx.coverage_ids_info(instance.def) else {
+            debug!("function has a coverage statement but no IDs info");
+            return;
+        };
 
         // Mark the instance as used in this CGU, for coverage purposes.
         // This includes functions that were not partitioned into this CGU,
@@ -157,22 +160,12 @@ impl<'tcx> CoverageInfoBuilderMethods<'tcx> for Builder<'_, '_, 'tcx> {
             CoverageKind::SpanMarker | CoverageKind::BlockMarker { .. } => unreachable!(
                 "marker statement {kind:?} should have been removed by CleanupPostBorrowck"
             ),
-            CoverageKind::CounterIncrement { id } => {
-                // The number of counters passed to `llvm.instrprof.increment` might
-                // be smaller than the number originally inserted by the instrumentor,
-                // if some high-numbered counters were removed by MIR optimizations.
-                // If so, LLVM's profiler runtime will use fewer physical counters.
-                let num_counters =
-                    bx.tcx().coverage_ids_info(instance.def).num_counters_after_mir_opts();
-                assert!(
-                    num_counters as usize <= function_coverage_info.num_counters,
-                    "num_counters disagreement: query says {num_counters} but function info only has {}",
-                    function_coverage_info.num_counters
-                );
-
+            CoverageKind::VirtualCounter { bcb }
+                if let Some(&id) = ids_info.phys_counter_for_node.get(&bcb) =>
+            {
                 let fn_name = bx.get_pgo_func_name_var(instance);
                 let hash = bx.const_u64(function_coverage_info.function_source_hash);
-                let num_counters = bx.const_u32(num_counters);
+                let num_counters = bx.const_u32(ids_info.num_counters);
                 let index = bx.const_u32(id.as_u32());
                 debug!(
                     "codegen intrinsic instrprof.increment(fn_name={:?}, hash={:?}, num_counters={:?}, index={:?})",
@@ -180,10 +173,8 @@ impl<'tcx> CoverageInfoBuilderMethods<'tcx> for Builder<'_, '_, 'tcx> {
                 );
                 bx.instrprof_increment(fn_name, hash, num_counters, index);
             }
-            CoverageKind::ExpressionUsed { id: _ } => {
-                // Expression-used statements are markers that are handled by
-                // `coverage_ids_info`, so there's nothing to codegen here.
-            }
+            // If a BCB doesn't have an associated physical counter, there's nothing to codegen.
+            CoverageKind::VirtualCounter { .. } => {}
             CoverageKind::CondBitmapUpdate { index, decision_depth } => {
                 let cond_bitmap = coverage_cx
                     .try_get_mcdc_condition_bitmap(&instance, decision_depth)
diff --git a/compiler/rustc_codegen_llvm/src/debuginfo/create_scope_map.rs b/compiler/rustc_codegen_llvm/src/debuginfo/create_scope_map.rs
index 07bd0f4d1c1..f52991b3697 100644
--- a/compiler/rustc_codegen_llvm/src/debuginfo/create_scope_map.rs
+++ b/compiler/rustc_codegen_llvm/src/debuginfo/create_scope_map.rs
@@ -4,12 +4,12 @@ use rustc_codegen_ssa::mir::debuginfo::{DebugScope, FunctionDebugContext};
 use rustc_codegen_ssa::traits::*;
 use rustc_data_structures::fx::FxHashMap;
 use rustc_index::Idx;
-use rustc_index::bit_set::BitSet;
+use rustc_index::bit_set::DenseBitSet;
 use rustc_middle::mir::{Body, SourceScope};
 use rustc_middle::ty::layout::{FnAbiOf, HasTypingEnv};
 use rustc_middle::ty::{self, Instance};
 use rustc_session::config::DebugInfo;
-use rustc_span::{BytePos, hygiene};
+use rustc_span::{BytePos, DUMMY_SP, hygiene};
 
 use super::metadata::file_metadata;
 use super::utils::DIB;
@@ -27,7 +27,7 @@ pub(crate) fn compute_mir_scopes<'ll, 'tcx>(
 ) {
     // Find all scopes with variables defined in them.
     let variables = if cx.sess().opts.debuginfo == DebugInfo::Full {
-        let mut vars = BitSet::new_empty(mir.source_scopes.len());
+        let mut vars = DenseBitSet::new_empty(mir.source_scopes.len());
         // FIXME(eddyb) take into account that arguments always have debuginfo,
         // irrespective of their name (assuming full debuginfo is enabled).
         // NOTE(eddyb) actually, on second thought, those are always in the
@@ -40,7 +40,7 @@ pub(crate) fn compute_mir_scopes<'ll, 'tcx>(
         // Nothing to emit, of course.
         None
     };
-    let mut instantiated = BitSet::new_empty(mir.source_scopes.len());
+    let mut instantiated = DenseBitSet::new_empty(mir.source_scopes.len());
     let mut discriminators = FxHashMap::default();
     // Instantiate all scopes.
     for idx in 0..mir.source_scopes.len() {
@@ -63,9 +63,9 @@ fn make_mir_scope<'ll, 'tcx>(
     cx: &CodegenCx<'ll, 'tcx>,
     instance: Instance<'tcx>,
     mir: &Body<'tcx>,
-    variables: &Option<BitSet<SourceScope>>,
+    variables: &Option<DenseBitSet<SourceScope>>,
     debug_context: &mut FunctionDebugContext<'tcx, &'ll DIScope, &'ll DILocation>,
-    instantiated: &mut BitSet<SourceScope>,
+    instantiated: &mut DenseBitSet<SourceScope>,
     discriminators: &mut FxHashMap<BytePos, u32>,
     scope: SourceScope,
 ) {
@@ -85,23 +85,15 @@ fn make_mir_scope<'ll, 'tcx>(
             discriminators,
             parent,
         );
-        if let Some(parent_scope) = debug_context.scopes[parent] {
-            parent_scope
-        } else {
-            // If the parent scope could not be represented then no children
-            // can be either.
-            debug_context.scopes[scope] = None;
-            instantiated.insert(scope);
-            return;
-        }
+        debug_context.scopes[parent]
     } else {
         // The root is the function itself.
         let file = cx.sess().source_map().lookup_source_file(mir.span.lo());
-        debug_context.scopes[scope] = Some(DebugScope {
+        debug_context.scopes[scope] = DebugScope {
             file_start_pos: file.start_pos,
             file_end_pos: file.end_position(),
-            ..debug_context.scopes[scope].unwrap()
-        });
+            ..debug_context.scopes[scope]
+        };
         instantiated.insert(scope);
         return;
     };
@@ -112,7 +104,7 @@ fn make_mir_scope<'ll, 'tcx>(
     {
         // Do not create a DIScope if there are no variables defined in this
         // MIR `SourceScope`, and it's not `inlined`, to avoid debuginfo bloat.
-        debug_context.scopes[scope] = Some(parent_scope);
+        debug_context.scopes[scope] = parent_scope;
         instantiated.insert(scope);
         return;
     }
@@ -135,7 +127,7 @@ fn make_mir_scope<'ll, 'tcx>(
             })
         }
         None => unsafe {
-            llvm::LLVMRustDIBuilderCreateLexicalBlock(
+            llvm::LLVMDIBuilderCreateLexicalBlock(
                 DIB(cx),
                 parent_scope.dbg_scope,
                 file_metadata,
@@ -145,14 +137,7 @@ fn make_mir_scope<'ll, 'tcx>(
         },
     };
 
-    let mut debug_scope = Some(DebugScope {
-        dbg_scope,
-        inlined_at: parent_scope.inlined_at,
-        file_start_pos: loc.file.start_pos,
-        file_end_pos: loc.file.end_position(),
-    });
-
-    if let Some((_, callsite_span)) = scope_data.inlined {
+    let inlined_at = scope_data.inlined.map(|(_, callsite_span)| {
         let callsite_span = hygiene::walk_chain_collapsed(callsite_span, mir.span);
         let callsite_scope = parent_scope.adjust_dbg_scope_for_span(cx, callsite_span);
         let loc = cx.dbg_loc(callsite_scope, parent_scope.inlined_at, callsite_span);
@@ -175,29 +160,29 @@ fn make_mir_scope<'ll, 'tcx>(
         // Note further that we can't key this hashtable on the span itself,
         // because these spans could have distinct SyntaxContexts. We have
         // to key on exactly what we're giving to LLVM.
-        let inlined_at = match discriminators.entry(callsite_span.lo()) {
+        match discriminators.entry(callsite_span.lo()) {
             Entry::Occupied(mut o) => {
                 *o.get_mut() += 1;
+                // NB: We have to emit *something* here or we'll fail LLVM IR verification
+                // in at least some circumstances (see issue #135322) so if the required
+                // discriminant cannot be encoded fall back to the dummy location.
                 unsafe { llvm::LLVMRustDILocationCloneWithBaseDiscriminator(loc, *o.get()) }
+                    .unwrap_or_else(|| {
+                        cx.dbg_loc(callsite_scope, parent_scope.inlined_at, DUMMY_SP)
+                    })
             }
             Entry::Vacant(v) => {
                 v.insert(0);
-                Some(loc)
-            }
-        };
-        match inlined_at {
-            Some(inlined_at) => {
-                debug_scope.as_mut().unwrap().inlined_at = Some(inlined_at);
-            }
-            None => {
-                // LLVM has a maximum discriminator that it can encode (currently
-                // it uses 12 bits for 4096 possible values). If we exceed that
-                // there is little we can do but drop the debug info.
-                debug_scope = None;
+                loc
             }
         }
-    }
+    });
 
-    debug_context.scopes[scope] = debug_scope;
+    debug_context.scopes[scope] = DebugScope {
+        dbg_scope,
+        inlined_at: inlined_at.or(parent_scope.inlined_at),
+        file_start_pos: loc.file.start_pos,
+        file_end_pos: loc.file.end_position(),
+    };
     instantiated.insert(scope);
 }
diff --git a/compiler/rustc_codegen_llvm/src/debuginfo/dwarf_const.rs b/compiler/rustc_codegen_llvm/src/debuginfo/dwarf_const.rs
new file mode 100644
index 00000000000..40842915222
--- /dev/null
+++ b/compiler/rustc_codegen_llvm/src/debuginfo/dwarf_const.rs
@@ -0,0 +1,37 @@
+//! Definitions of various DWARF-related constants.
+
+use libc::c_uint;
+
+/// Helper macro to let us redeclare gimli's constants as our own constants
+/// with a different type, with less risk of copy-paste errors.
+macro_rules! declare_constant {
+    (
+        $name:ident : $type:ty
+    ) => {
+        #[allow(non_upper_case_globals)]
+        pub(crate) const $name: $type = ::gimli::constants::$name.0 as $type;
+
+        // Assert that as-cast probably hasn't changed the value.
+        const _: () = assert!($name as i128 == ::gimli::constants::$name.0 as i128);
+    };
+}
+
+declare_constant!(DW_TAG_const_type: c_uint);
+
+// DWARF languages.
+declare_constant!(DW_LANG_Rust: c_uint);
+
+// DWARF attribute type encodings.
+declare_constant!(DW_ATE_boolean: c_uint);
+declare_constant!(DW_ATE_float: c_uint);
+declare_constant!(DW_ATE_signed: c_uint);
+declare_constant!(DW_ATE_unsigned: c_uint);
+declare_constant!(DW_ATE_UTF: c_uint);
+
+// DWARF expression operators.
+declare_constant!(DW_OP_deref: u64);
+declare_constant!(DW_OP_plus_uconst: u64);
+/// Defined by LLVM in `llvm/include/llvm/BinaryFormat/Dwarf.h`.
+/// Double-checked by a static assertion in `RustWrapper.cpp`.
+#[allow(non_upper_case_globals)]
+pub(crate) const DW_OP_LLVM_fragment: u64 = 0x1000;
diff --git a/compiler/rustc_codegen_llvm/src/debuginfo/gdb.rs b/compiler/rustc_codegen_llvm/src/debuginfo/gdb.rs
index 2c9f1cda13a..54c5d445f66 100644
--- a/compiler/rustc_codegen_llvm/src/debuginfo/gdb.rs
+++ b/compiler/rustc_codegen_llvm/src/debuginfo/gdb.rs
@@ -73,7 +73,7 @@ pub(crate) fn get_or_insert_gdb_debug_scripts_section_global<'ll>(
                 .define_global(section_var_name, llvm_type)
                 .unwrap_or_else(|| bug!("symbol `{}` is already defined", section_var_name));
             llvm::set_section(section_var, c".debug_gdb_scripts");
-            llvm::LLVMSetInitializer(section_var, cx.const_bytes(section_contents));
+            llvm::set_initializer(section_var, cx.const_bytes(section_contents));
             llvm::LLVMSetGlobalConstant(section_var, llvm::True);
             llvm::LLVMSetUnnamedAddress(section_var, llvm::UnnamedAddr::Global);
             llvm::set_linkage(section_var, llvm::Linkage::LinkOnceODRLinkage);
diff --git a/compiler/rustc_codegen_llvm/src/debuginfo/metadata.rs b/compiler/rustc_codegen_llvm/src/debuginfo/metadata.rs
index 40248a9009a..59c3fe635d0 100644
--- a/compiler/rustc_codegen_llvm/src/debuginfo/metadata.rs
+++ b/compiler/rustc_codegen_llvm/src/debuginfo/metadata.rs
@@ -13,7 +13,7 @@ use rustc_hir::def_id::{DefId, LOCAL_CRATE};
 use rustc_middle::bug;
 use rustc_middle::ty::layout::{HasTypingEnv, LayoutOf, TyAndLayout};
 use rustc_middle::ty::{
-    self, AdtKind, CoroutineArgsExt, Instance, PolyExistentialTraitRef, Ty, TyCtxt, Visibility,
+    self, AdtKind, CoroutineArgsExt, ExistentialTraitRef, Instance, Ty, TyCtxt, Visibility,
 };
 use rustc_session::config::{self, DebugInfo, Lto};
 use rustc_span::{DUMMY_SP, FileName, FileNameDisplayPreference, SourceFile, Symbol, hygiene};
@@ -22,6 +22,7 @@ use rustc_target::spec::DebuginfoKind;
 use smallvec::smallvec;
 use tracing::{debug, instrument};
 
+pub(crate) use self::type_map::TypeMap;
 use self::type_map::{DINodeCreationResult, Stub, UniqueTypeId};
 use super::CodegenUnitDebugContext;
 use super::namespace::mangled_name_of_instance;
@@ -30,6 +31,7 @@ use super::utils::{
     DIB, create_DIArray, debug_context, get_namespace_for_item, is_node_local_to_unit,
 };
 use crate::common::{AsCCharPtr, CodegenCx};
+use crate::debuginfo::dwarf_const;
 use crate::debuginfo::metadata::type_map::build_type_with_children;
 use crate::debuginfo::utils::{WidePtrKind, wide_pointer_kind};
 use crate::llvm::debuginfo::{
@@ -59,20 +61,6 @@ impl fmt::Debug for llvm::Metadata {
     }
 }
 
-// From DWARF 5.
-// See http://www.dwarfstd.org/ShowIssue.php?issue=140129.1.
-const DW_LANG_RUST: c_uint = 0x1c;
-#[allow(non_upper_case_globals)]
-const DW_ATE_boolean: c_uint = 0x02;
-#[allow(non_upper_case_globals)]
-const DW_ATE_float: c_uint = 0x04;
-#[allow(non_upper_case_globals)]
-const DW_ATE_signed: c_uint = 0x05;
-#[allow(non_upper_case_globals)]
-const DW_ATE_unsigned: c_uint = 0x07;
-#[allow(non_upper_case_globals)]
-const DW_ATE_UTF: c_uint = 0x10;
-
 pub(super) const UNKNOWN_LINE_NUMBER: c_uint = 0;
 pub(super) const UNKNOWN_COLUMN_NUMBER: c_uint = 0;
 
@@ -87,8 +75,6 @@ type SmallVec<T> = smallvec::SmallVec<[T; 16]>;
 mod enums;
 mod type_map;
 
-pub(crate) use type_map::TypeMap;
-
 /// Returns from the enclosing function if the type debuginfo node with the given
 /// unique ID can be found in the type map.
 macro_rules! return_if_di_node_created_in_meantime {
@@ -333,19 +319,16 @@ fn build_subroutine_type_di_node<'ll, 'tcx>(
     // This is actually a function pointer, so wrap it in pointer DI.
     let name = compute_debuginfo_type_name(cx.tcx, fn_ty, false);
     let (size, align) = match fn_ty.kind() {
-        ty::FnDef(..) => (0, 1),
-        ty::FnPtr(..) => (
-            cx.tcx.data_layout.pointer_size.bits(),
-            cx.tcx.data_layout.pointer_align.abi.bits() as u32,
-        ),
+        ty::FnDef(..) => (Size::ZERO, Align::ONE),
+        ty::FnPtr(..) => (cx.tcx.data_layout.pointer_size, cx.tcx.data_layout.pointer_align.abi),
         _ => unreachable!(),
     };
     let di_node = unsafe {
         llvm::LLVMRustDIBuilderCreatePointerType(
             DIB(cx),
             fn_di_node,
-            size,
-            align,
+            size.bits(),
+            align.bits() as u32,
             0, // Ignore DWARF address space.
             name.as_c_char_ptr(),
             name.len(),
@@ -456,7 +439,7 @@ pub(crate) fn type_di_node<'ll, 'tcx>(cx: &CodegenCx<'ll, 'tcx>, t: Ty<'tcx>) ->
         // (or if there is no allocator argument).
         ty::Adt(def, args)
             if def.is_box()
-                && args.get(1).map_or(true, |arg| cx.layout_of(arg.expect_ty()).is_1zst()) =>
+                && args.get(1).is_none_or(|arg| cx.layout_of(arg.expect_ty()).is_1zst()) =>
         {
             build_pointer_or_reference_di_node(cx, t, t.expect_boxed_ty(), unique_type_id)
         }
@@ -519,7 +502,7 @@ fn recursion_marker_type_di_node<'ll, 'tcx>(cx: &CodegenCx<'ll, 'tcx>) -> &'ll D
                 name.as_c_char_ptr(),
                 name.len(),
                 cx.tcx.data_layout.pointer_size.bits(),
-                DW_ATE_unsigned,
+                dwarf_const::DW_ATE_unsigned,
             )
         }
     })
@@ -778,6 +761,8 @@ fn build_basic_type_di_node<'ll, 'tcx>(
     // .natvis visualizers (and perhaps other existing native debuggers?)
     let cpp_like_debuginfo = cpp_like_debuginfo(cx.tcx);
 
+    use dwarf_const::{DW_ATE_UTF, DW_ATE_boolean, DW_ATE_float, DW_ATE_signed, DW_ATE_unsigned};
+
     let (name, encoding) = match t.kind() {
         ty::Never => ("!", DW_ATE_unsigned),
         ty::Tuple(elements) if elements.is_empty() => {
@@ -931,8 +916,7 @@ pub(crate) fn build_compile_unit_di_node<'ll, 'tcx>(
         .unwrap_or_default();
     let kind = DebugEmissionKind::from_generic(tcx.sess.opts.debuginfo);
 
-    let dwarf_version =
-        tcx.sess.opts.unstable_opts.dwarf_version.unwrap_or(tcx.sess.target.default_dwarf_version);
+    let dwarf_version = tcx.sess.dwarf_version();
     let is_dwarf_kind =
         matches!(tcx.sess.target.debuginfo_kind, DebuginfoKind::Dwarf | DebuginfoKind::DwarfDsym);
     // Don't emit `.debug_pubnames` and `.debug_pubtypes` on DWARFv4 or lower.
@@ -944,7 +928,7 @@ pub(crate) fn build_compile_unit_di_node<'ll, 'tcx>(
 
     unsafe {
         let compile_unit_file = llvm::LLVMRustDIBuilderCreateFile(
-            debug_context.builder,
+            debug_context.builder.as_ref(),
             name_in_debuginfo.as_c_char_ptr(),
             name_in_debuginfo.len(),
             work_dir.as_c_char_ptr(),
@@ -957,8 +941,8 @@ pub(crate) fn build_compile_unit_di_node<'ll, 'tcx>(
         );
 
         let unit_metadata = llvm::LLVMRustDIBuilderCreateCompileUnit(
-            debug_context.builder,
-            DW_LANG_RUST,
+            debug_context.builder.as_ref(),
+            dwarf_const::DW_LANG_Rust,
             compile_unit_file,
             producer.as_c_char_ptr(),
             producer.len(),
@@ -1412,7 +1396,7 @@ pub(crate) fn build_global_var_di_node<'ll>(
 fn build_vtable_type_di_node<'ll, 'tcx>(
     cx: &CodegenCx<'ll, 'tcx>,
     ty: Ty<'tcx>,
-    poly_trait_ref: Option<ty::PolyExistentialTraitRef<'tcx>>,
+    poly_trait_ref: Option<ty::ExistentialTraitRef<'tcx>>,
 ) -> &'ll DIType {
     let tcx = cx.tcx;
 
@@ -1500,10 +1484,30 @@ fn build_vtable_type_di_node<'ll, 'tcx>(
     .di_node
 }
 
+/// Get the global variable for the vtable.
+///
+/// When using global variables, we may have created an addrspacecast to get a pointer to the
+/// default address space if global variables are created in a different address space.
+/// For modifying the vtable, we need the real global variable. This function accepts either a
+/// global variable (which is simply returned), or an addrspacecast constant expression.
+/// If the given value is an addrspacecast, the cast is removed and the global variable behind
+/// the cast is returned.
+fn find_vtable_behind_cast<'ll>(vtable: &'ll Value) -> &'ll Value {
+    // The vtable is a global variable, which may be behind an addrspacecast.
+    unsafe {
+        if let Some(c) = llvm::LLVMIsAConstantExpr(vtable) {
+            if llvm::LLVMGetConstOpcode(c) == llvm::Opcode::AddrSpaceCast {
+                return llvm::LLVMGetOperand(c, 0).unwrap();
+            }
+        }
+    }
+    vtable
+}
+
 pub(crate) fn apply_vcall_visibility_metadata<'ll, 'tcx>(
     cx: &CodegenCx<'ll, 'tcx>,
     ty: Ty<'tcx>,
-    trait_ref: Option<PolyExistentialTraitRef<'tcx>>,
+    trait_ref: Option<ExistentialTraitRef<'tcx>>,
     vtable: &'ll Value,
 ) {
     // FIXME(flip1995): The virtual function elimination optimization only works with full LTO in
@@ -1520,9 +1524,11 @@ pub(crate) fn apply_vcall_visibility_metadata<'ll, 'tcx>(
 
     let Some(trait_ref) = trait_ref else { return };
 
+    // Unwrap potential addrspacecast
+    let vtable = find_vtable_behind_cast(vtable);
     let trait_ref_self = trait_ref.with_self_ty(cx.tcx, ty);
     let trait_ref_self = cx.tcx.erase_regions(trait_ref_self);
-    let trait_def_id = trait_ref_self.def_id();
+    let trait_def_id = trait_ref_self.def_id;
     let trait_vis = cx.tcx.visibility(trait_def_id);
 
     let cgus = cx.sess().codegen_units().as_usize();
@@ -1581,7 +1587,7 @@ pub(crate) fn apply_vcall_visibility_metadata<'ll, 'tcx>(
 pub(crate) fn create_vtable_di_node<'ll, 'tcx>(
     cx: &CodegenCx<'ll, 'tcx>,
     ty: Ty<'tcx>,
-    poly_trait_ref: Option<ty::PolyExistentialTraitRef<'tcx>>,
+    poly_trait_ref: Option<ty::ExistentialTraitRef<'tcx>>,
     vtable: &'ll Value,
 ) {
     if cx.dbg_cx.is_none() {
@@ -1593,6 +1599,9 @@ pub(crate) fn create_vtable_di_node<'ll, 'tcx>(
         return;
     }
 
+    // Unwrap potential addrspacecast
+    let vtable = find_vtable_behind_cast(vtable);
+
     // When full debuginfo is enabled, we want to try and prevent vtables from being
     // merged. Otherwise debuggers will have a hard time mapping from dyn pointer
     // to concrete type.
@@ -1629,7 +1638,14 @@ pub(crate) fn extend_scope_to_file<'ll>(
     file: &SourceFile,
 ) -> &'ll DILexicalBlock {
     let file_metadata = file_metadata(cx, file);
-    unsafe { llvm::LLVMRustDIBuilderCreateLexicalBlockFile(DIB(cx), scope_metadata, file_metadata) }
+    unsafe {
+        llvm::LLVMDIBuilderCreateLexicalBlockFile(
+            DIB(cx),
+            scope_metadata,
+            file_metadata,
+            /* Discriminator (default) */ 0u32,
+        )
+    }
 }
 
 fn tuple_field_name(field_index: usize) -> Cow<'static, str> {
diff --git a/compiler/rustc_codegen_llvm/src/debuginfo/metadata/enums/cpp_like.rs b/compiler/rustc_codegen_llvm/src/debuginfo/metadata/enums/cpp_like.rs
index 23e11748e52..a72e205c9b2 100644
--- a/compiler/rustc_codegen_llvm/src/debuginfo/metadata/enums/cpp_like.rs
+++ b/compiler/rustc_codegen_llvm/src/debuginfo/metadata/enums/cpp_like.rs
@@ -12,6 +12,7 @@ use rustc_middle::ty::{self, AdtDef, CoroutineArgs, CoroutineArgsExt, Ty};
 use smallvec::smallvec;
 
 use crate::common::{AsCCharPtr, CodegenCx};
+use crate::debuginfo::dwarf_const::DW_TAG_const_type;
 use crate::debuginfo::metadata::enums::DiscrResult;
 use crate::debuginfo::metadata::type_map::{self, Stub, UniqueTypeId};
 use crate::debuginfo::metadata::{
@@ -566,22 +567,39 @@ fn build_variant_struct_wrapper_type_di_node<'ll, 'tcx>(
                 None,
             ));
 
-            let build_assoc_const =
-                |name: &str, type_di_node: &'ll DIType, value: u64, align: Align| unsafe {
-                    llvm::LLVMRustDIBuilderCreateStaticMemberType(
-                        DIB(cx),
-                        wrapper_struct_type_di_node,
-                        name.as_c_char_ptr(),
-                        name.len(),
-                        unknown_file_metadata(cx),
-                        UNKNOWN_LINE_NUMBER,
-                        type_di_node,
-                        DIFlags::FlagZero,
-                        Some(cx.const_u64(value)),
-                        align.bits() as u32,
-                    )
+            let build_assoc_const = |name: &str,
+                                     type_di_node_: &'ll DIType,
+                                     value: u64,
+                                     align: Align| unsafe {
+                // FIXME: Currently we force all DISCR_* values to be u64's as LLDB seems to have
+                // problems inspecting other value types. Since DISCR_* is typically only going to be
+                // directly inspected via the debugger visualizer - which compares it to the `tag` value
+                // (whose type is not modified at all) it shouldn't cause any real problems.
+                let (t_di, align) = if name == ASSOC_CONST_DISCR_NAME {
+                    (type_di_node_, align.bits() as u32)
+                } else {
+                    let ty_u64 = Ty::new_uint(cx.tcx, ty::UintTy::U64);
+                    (type_di_node(cx, ty_u64), Align::EIGHT.bits() as u32)
                 };
 
+                // must wrap type in a `const` modifier for LLDB to be able to inspect the value of the member
+                let field_type =
+                    llvm::LLVMRustDIBuilderCreateQualifiedType(DIB(cx), DW_TAG_const_type, t_di);
+
+                llvm::LLVMRustDIBuilderCreateStaticMemberType(
+                    DIB(cx),
+                    wrapper_struct_type_di_node,
+                    name.as_c_char_ptr(),
+                    name.len(),
+                    unknown_file_metadata(cx),
+                    UNKNOWN_LINE_NUMBER,
+                    field_type,
+                    DIFlags::FlagZero,
+                    Some(cx.const_u64(value)),
+                    align,
+                )
+            };
+
             // We also always have an associated constant for the discriminant value
             // of the variant.
             fields.push(build_assoc_const(
diff --git a/compiler/rustc_codegen_llvm/src/debuginfo/metadata/type_map.rs b/compiler/rustc_codegen_llvm/src/debuginfo/metadata/type_map.rs
index a37e719d43f..af1d503ad6a 100644
--- a/compiler/rustc_codegen_llvm/src/debuginfo/metadata/type_map.rs
+++ b/compiler/rustc_codegen_llvm/src/debuginfo/metadata/type_map.rs
@@ -6,7 +6,7 @@ use rustc_data_structures::fx::FxHashMap;
 use rustc_data_structures::stable_hasher::{HashStable, StableHasher};
 use rustc_macros::HashStable;
 use rustc_middle::bug;
-use rustc_middle::ty::{self, PolyExistentialTraitRef, Ty, TyCtxt};
+use rustc_middle::ty::{self, ExistentialTraitRef, Ty, TyCtxt};
 
 use super::{DefinitionLocation, SmallVec, UNKNOWN_LINE_NUMBER, unknown_file_metadata};
 use crate::common::{AsCCharPtr, CodegenCx};
@@ -44,7 +44,7 @@ pub(super) enum UniqueTypeId<'tcx> {
     /// The ID for the additional wrapper struct type describing an enum variant in CPP-like mode.
     VariantStructTypeCppLikeWrapper(Ty<'tcx>, VariantIdx, private::HiddenZst),
     /// The ID of the artificial type we create for VTables.
-    VTableTy(Ty<'tcx>, Option<PolyExistentialTraitRef<'tcx>>, private::HiddenZst),
+    VTableTy(Ty<'tcx>, Option<ExistentialTraitRef<'tcx>>, private::HiddenZst),
 }
 
 impl<'tcx> UniqueTypeId<'tcx> {
@@ -88,7 +88,7 @@ impl<'tcx> UniqueTypeId<'tcx> {
     pub(crate) fn for_vtable_ty(
         tcx: TyCtxt<'tcx>,
         self_type: Ty<'tcx>,
-        implemented_trait: Option<PolyExistentialTraitRef<'tcx>>,
+        implemented_trait: Option<ExistentialTraitRef<'tcx>>,
     ) -> Self {
         assert_eq!(
             self_type,
diff --git a/compiler/rustc_codegen_llvm/src/debuginfo/mod.rs b/compiler/rustc_codegen_llvm/src/debuginfo/mod.rs
index fae698bea2a..17f2d5f4e73 100644
--- a/compiler/rustc_codegen_llvm/src/debuginfo/mod.rs
+++ b/compiler/rustc_codegen_llvm/src/debuginfo/mod.rs
@@ -2,6 +2,7 @@
 
 use std::cell::{OnceCell, RefCell};
 use std::ops::Range;
+use std::sync::Arc;
 use std::{iter, ptr};
 
 use libc::c_uint;
@@ -10,7 +11,6 @@ use rustc_codegen_ssa::debuginfo::type_names;
 use rustc_codegen_ssa::mir::debuginfo::VariableKind::*;
 use rustc_codegen_ssa::mir::debuginfo::{DebugScope, FunctionDebugContext, VariableKind};
 use rustc_codegen_ssa::traits::*;
-use rustc_data_structures::sync::Lrc;
 use rustc_data_structures::unord::UnordMap;
 use rustc_hir::def_id::{DefId, DefIdMap};
 use rustc_index::IndexVec;
@@ -22,6 +22,7 @@ use rustc_session::config::{self, DebugInfo};
 use rustc_span::{
     BytePos, Pos, SourceFile, SourceFileAndLine, SourceFileHash, Span, StableSourceFileId, Symbol,
 };
+use rustc_target::spec::DebuginfoKind;
 use smallvec::SmallVec;
 use tracing::debug;
 
@@ -33,12 +34,13 @@ use crate::builder::Builder;
 use crate::common::{AsCCharPtr, CodegenCx};
 use crate::llvm;
 use crate::llvm::debuginfo::{
-    DIArray, DIBuilder, DIFile, DIFlags, DILexicalBlock, DILocation, DISPFlags, DIScope, DIType,
+    DIArray, DIBuilderBox, DIFile, DIFlags, DILexicalBlock, DILocation, DISPFlags, DIScope, DIType,
     DIVariable,
 };
 use crate::value::Value;
 
 mod create_scope_map;
+mod dwarf_const;
 mod gdb;
 pub(crate) mod metadata;
 mod namespace;
@@ -47,6 +49,10 @@ mod utils;
 use self::create_scope_map::compute_mir_scopes;
 pub(crate) use self::metadata::build_global_var_di_node;
 
+// FIXME(Zalathar): These `DW_TAG_*` constants are fake values that were
+// removed from LLVM in 2015, and are only used by our own `RustWrapper.cpp`
+// to decide which C++ API to call. Instead, we should just have two separate
+// FFI functions and choose the correct one on the Rust side.
 #[allow(non_upper_case_globals)]
 const DW_TAG_auto_variable: c_uint = 0x100;
 #[allow(non_upper_case_globals)]
@@ -55,7 +61,7 @@ const DW_TAG_arg_variable: c_uint = 0x101;
 /// A context object for maintaining all state needed by the debuginfo module.
 pub(crate) struct CodegenUnitDebugContext<'ll, 'tcx> {
     llmod: &'ll llvm::Module,
-    builder: &'ll mut DIBuilder<'ll>,
+    builder: DIBuilderBox<'ll>,
     created_files: RefCell<UnordMap<Option<(StableSourceFileId, SourceFileHash)>, &'ll DIFile>>,
 
     type_map: metadata::TypeMap<'ll, 'tcx>,
@@ -63,18 +69,10 @@ pub(crate) struct CodegenUnitDebugContext<'ll, 'tcx> {
     recursion_marker_type: OnceCell<&'ll DIType>,
 }
 
-impl Drop for CodegenUnitDebugContext<'_, '_> {
-    fn drop(&mut self) {
-        unsafe {
-            llvm::LLVMRustDIBuilderDispose(&mut *(self.builder as *mut _));
-        }
-    }
-}
-
 impl<'ll, 'tcx> CodegenUnitDebugContext<'ll, 'tcx> {
     pub(crate) fn new(llmod: &'ll llvm::Module) -> Self {
         debug!("CodegenUnitDebugContext::new");
-        let builder = unsafe { llvm::LLVMRustDIBuilderCreate(llmod) };
+        let builder = DIBuilderBox::new(llmod);
         // DIBuilder inherits context from the module, so we'd better use the same one
         CodegenUnitDebugContext {
             llmod,
@@ -87,30 +85,36 @@ impl<'ll, 'tcx> CodegenUnitDebugContext<'ll, 'tcx> {
     }
 
     pub(crate) fn finalize(&self, sess: &Session) {
-        unsafe { llvm::LLVMRustDIBuilderFinalize(self.builder) };
-        if !sess.target.is_like_msvc {
-            // Debuginfo generation in LLVM by default uses a higher
-            // version of dwarf than macOS currently understands. We can
-            // instruct LLVM to emit an older version of dwarf, however,
-            // for macOS to understand. For more info see #11352
-            // This can be overridden using --llvm-opts -dwarf-version,N.
-            // Android has the same issue (#22398)
-            let dwarf_version =
-                sess.opts.unstable_opts.dwarf_version.unwrap_or(sess.target.default_dwarf_version);
-            llvm::add_module_flag_u32(
-                self.llmod,
-                llvm::ModuleFlagMergeBehavior::Warning,
-                "Dwarf Version",
-                dwarf_version,
-            );
-        } else {
-            // Indicate that we want CodeView debug information on MSVC
-            llvm::add_module_flag_u32(
-                self.llmod,
-                llvm::ModuleFlagMergeBehavior::Warning,
-                "CodeView",
-                1,
-            );
+        unsafe { llvm::LLVMDIBuilderFinalize(self.builder.as_ref()) };
+
+        match sess.target.debuginfo_kind {
+            DebuginfoKind::Dwarf | DebuginfoKind::DwarfDsym => {
+                // Debuginfo generation in LLVM by default uses a higher
+                // version of dwarf than macOS currently understands. We can
+                // instruct LLVM to emit an older version of dwarf, however,
+                // for macOS to understand. For more info see #11352
+                // This can be overridden using --llvm-opts -dwarf-version,N.
+                // Android has the same issue (#22398)
+                llvm::add_module_flag_u32(
+                    self.llmod,
+                    // In the case where multiple CGUs with different dwarf version
+                    // values are being merged together, such as with cross-crate
+                    // LTO, then we want to use the highest version of dwarf
+                    // we can. This matches Clang's behavior as well.
+                    llvm::ModuleFlagMergeBehavior::Max,
+                    "Dwarf Version",
+                    sess.dwarf_version(),
+                );
+            }
+            DebuginfoKind::Pdb => {
+                // Indicate that we want CodeView debug information
+                llvm::add_module_flag_u32(
+                    self.llmod,
+                    llvm::ModuleFlagMergeBehavior::Warning,
+                    "CodeView",
+                    1,
+                );
+            }
         }
 
         // Prevent bitcode readers from deleting the debug info.
@@ -152,29 +156,26 @@ impl<'ll> DebugInfoBuilderMethods for Builder<'_, 'll, '_> {
         indirect_offsets: &[Size],
         fragment: Option<Range<Size>>,
     ) {
+        use dwarf_const::{DW_OP_LLVM_fragment, DW_OP_deref, DW_OP_plus_uconst};
+
         // Convert the direct and indirect offsets and fragment byte range to address ops.
-        // FIXME(eddyb) use `const`s instead of getting the values via FFI,
-        // the values should match the ones in the DWARF standard anyway.
-        let op_deref = || unsafe { llvm::LLVMRustDIBuilderCreateOpDeref() };
-        let op_plus_uconst = || unsafe { llvm::LLVMRustDIBuilderCreateOpPlusUconst() };
-        let op_llvm_fragment = || unsafe { llvm::LLVMRustDIBuilderCreateOpLLVMFragment() };
         let mut addr_ops = SmallVec::<[u64; 8]>::new();
 
         if direct_offset.bytes() > 0 {
-            addr_ops.push(op_plus_uconst());
+            addr_ops.push(DW_OP_plus_uconst);
             addr_ops.push(direct_offset.bytes() as u64);
         }
         for &offset in indirect_offsets {
-            addr_ops.push(op_deref());
+            addr_ops.push(DW_OP_deref);
             if offset.bytes() > 0 {
-                addr_ops.push(op_plus_uconst());
+                addr_ops.push(DW_OP_plus_uconst);
                 addr_ops.push(offset.bytes() as u64);
             }
         }
         if let Some(fragment) = fragment {
             // `DW_OP_LLVM_fragment` takes as arguments the fragment's
             // offset and size, both of them in bits.
-            addr_ops.push(op_llvm_fragment());
+            addr_ops.push(DW_OP_LLVM_fragment);
             addr_ops.push(fragment.start.bits() as u64);
             addr_ops.push((fragment.end - fragment.start).bits() as u64);
         }
@@ -243,7 +244,7 @@ impl<'ll> DebugInfoBuilderMethods for Builder<'_, 'll, '_> {
 // `lookup_char_pos` return the right information instead.
 struct DebugLoc {
     /// Information about the original source file.
-    file: Lrc<SourceFile>,
+    file: Arc<SourceFile>,
     /// The (1-based) line number.
     line: u32,
     /// The (1-based) column number.
@@ -293,12 +294,12 @@ impl<'ll, 'tcx> DebugInfoCodegenMethods<'tcx> for CodegenCx<'ll, 'tcx> {
         }
 
         // Initialize fn debug context (including scopes).
-        let empty_scope = Some(DebugScope {
+        let empty_scope = DebugScope {
             dbg_scope: self.dbg_scope_fn(instance, fn_abi, Some(llfn)),
             inlined_at: None,
             file_start_pos: BytePos(0),
             file_end_pos: BytePos(0),
-        });
+        };
         let mut fn_debug_context = FunctionDebugContext {
             scopes: IndexVec::from_elem(empty_scope, &mir.source_scopes),
             inlined_function_scopes: Default::default(),
@@ -547,14 +548,17 @@ impl<'ll, 'tcx> DebugInfoCodegenMethods<'tcx> for CodegenCx<'ll, 'tcx> {
                 }
             }
 
-            let scope = namespace::item_namespace(cx, DefId {
-                krate: instance.def_id().krate,
-                index: cx
-                    .tcx
-                    .def_key(instance.def_id())
-                    .parent
-                    .expect("get_containing_scope: missing parent?"),
-            });
+            let scope = namespace::item_namespace(
+                cx,
+                DefId {
+                    krate: instance.def_id().krate,
+                    index: cx
+                        .tcx
+                        .def_key(instance.def_id())
+                        .parent
+                        .expect("get_containing_scope: missing parent?"),
+                },
+            );
             (scope, false)
         }
     }
@@ -577,13 +581,13 @@ impl<'ll, 'tcx> DebugInfoCodegenMethods<'tcx> for CodegenCx<'ll, 'tcx> {
             (line, col)
         };
 
-        unsafe { llvm::LLVMRustDIBuilderCreateDebugLocation(line, col, scope, inlined_at) }
+        unsafe { llvm::LLVMDIBuilderCreateDebugLocation(self.llcx, line, col, scope, inlined_at) }
     }
 
     fn create_vtable_debuginfo(
         &self,
         ty: Ty<'tcx>,
-        trait_ref: Option<ty::PolyExistentialTraitRef<'tcx>>,
+        trait_ref: Option<ty::ExistentialTraitRef<'tcx>>,
         vtable: Self::Value,
     ) {
         metadata::create_vtable_di_node(self, ty, trait_ref, vtable)
@@ -636,7 +640,7 @@ impl<'ll, 'tcx> DebugInfoCodegenMethods<'tcx> for CodegenCx<'ll, 'tcx> {
                 true,
                 DIFlags::FlagZero,
                 argument_index,
-                align.bytes() as u32,
+                align.bits() as u32,
             )
         }
     }
diff --git a/compiler/rustc_codegen_llvm/src/debuginfo/namespace.rs b/compiler/rustc_codegen_llvm/src/debuginfo/namespace.rs
index 33d9bc23890..b4d639368b0 100644
--- a/compiler/rustc_codegen_llvm/src/debuginfo/namespace.rs
+++ b/compiler/rustc_codegen_llvm/src/debuginfo/namespace.rs
@@ -5,7 +5,7 @@ use rustc_hir::def_id::DefId;
 use rustc_middle::ty::{self, Instance};
 
 use super::utils::{DIB, debug_context};
-use crate::common::{AsCCharPtr, CodegenCx};
+use crate::common::CodegenCx;
 use crate::llvm;
 use crate::llvm::debuginfo::DIScope;
 
@@ -33,12 +33,12 @@ pub(crate) fn item_namespace<'ll>(cx: &CodegenCx<'ll, '_>, def_id: DefId) -> &'l
     };
 
     let scope = unsafe {
-        llvm::LLVMRustDIBuilderCreateNameSpace(
+        llvm::LLVMDIBuilderCreateNameSpace(
             DIB(cx),
             parent_scope,
-            namespace_name_string.as_c_char_ptr(),
+            namespace_name_string.as_ptr(),
             namespace_name_string.len(),
-            false, // ExportSymbols (only relevant for C++ anonymous namespaces)
+            llvm::False, // ExportSymbols (only relevant for C++ anonymous namespaces)
         )
     };
 
diff --git a/compiler/rustc_codegen_llvm/src/debuginfo/utils.rs b/compiler/rustc_codegen_llvm/src/debuginfo/utils.rs
index 6e841293477..cc1d504b430 100644
--- a/compiler/rustc_codegen_llvm/src/debuginfo/utils.rs
+++ b/compiler/rustc_codegen_llvm/src/debuginfo/utils.rs
@@ -41,7 +41,7 @@ pub(crate) fn debug_context<'a, 'll, 'tcx>(
 #[inline]
 #[allow(non_snake_case)]
 pub(crate) fn DIB<'a, 'll>(cx: &'a CodegenCx<'ll, '_>) -> &'a DIBuilder<'ll> {
-    cx.dbg_cx.as_ref().unwrap().builder
+    cx.dbg_cx.as_ref().unwrap().builder.as_ref()
 }
 
 pub(crate) fn get_namespace_for_item<'ll>(cx: &CodegenCx<'ll, '_>, def_id: DefId) -> &'ll DIScope {
diff --git a/compiler/rustc_codegen_llvm/src/declare.rs b/compiler/rustc_codegen_llvm/src/declare.rs
index 3ec386f6b07..cebceef1b93 100644
--- a/compiler/rustc_codegen_llvm/src/declare.rs
+++ b/compiler/rustc_codegen_llvm/src/declare.rs
@@ -21,26 +21,26 @@ use tracing::debug;
 
 use crate::abi::{FnAbi, FnAbiLlvmExt};
 use crate::common::AsCCharPtr;
-use crate::context::CodegenCx;
+use crate::context::{CodegenCx, SimpleCx};
 use crate::llvm::AttributePlace::Function;
 use crate::llvm::Visibility;
 use crate::type_::Type;
 use crate::value::Value;
 use crate::{attributes, llvm};
 
-/// Declare a function.
+/// Declare a function with a SimpleCx.
 ///
 /// If there’s a value with the same name already declared, the function will
 /// update the declaration and return existing Value instead.
-pub(crate) fn declare_raw_fn<'ll>(
-    cx: &CodegenCx<'ll, '_>,
+pub(crate) fn declare_simple_fn<'ll>(
+    cx: &SimpleCx<'ll>,
     name: &str,
     callconv: llvm::CallConv,
     unnamed: llvm::UnnamedAddr,
     visibility: llvm::Visibility,
     ty: &'ll Type,
 ) -> &'ll Value {
-    debug!("declare_raw_fn(name={:?}, ty={:?})", name, ty);
+    debug!("declare_simple_fn(name={:?}, ty={:?})", name, ty);
     let llfn = unsafe {
         llvm::LLVMRustGetOrInsertFunction(cx.llmod, name.as_c_char_ptr(), name.len(), ty)
     };
@@ -49,6 +49,24 @@ pub(crate) fn declare_raw_fn<'ll>(
     llvm::SetUnnamedAddress(llfn, unnamed);
     llvm::set_visibility(llfn, visibility);
 
+    llfn
+}
+
+/// Declare a function.
+///
+/// If there’s a value with the same name already declared, the function will
+/// update the declaration and return existing Value instead.
+pub(crate) fn declare_raw_fn<'ll, 'tcx>(
+    cx: &CodegenCx<'ll, 'tcx>,
+    name: &str,
+    callconv: llvm::CallConv,
+    unnamed: llvm::UnnamedAddr,
+    visibility: llvm::Visibility,
+    ty: &'ll Type,
+) -> &'ll Value {
+    debug!("declare_raw_fn(name={:?}, ty={:?})", name, ty);
+    let llfn = declare_simple_fn(cx, name, callconv, unnamed, visibility, ty);
+
     let mut attrs = SmallVec::<[_; 4]>::new();
 
     if cx.tcx.sess.opts.cg.no_redzone.unwrap_or(cx.tcx.sess.target.disable_redzone) {
@@ -125,7 +143,7 @@ impl<'ll, 'tcx> CodegenCx<'ll, 'tcx> {
         let llfn = declare_raw_fn(
             self,
             name,
-            fn_abi.llvm_cconv(),
+            fn_abi.llvm_cconv(self),
             llvm::UnnamedAddr::Global,
             llvm::Visibility::Default,
             fn_abi.llvm_type(self),
@@ -217,7 +235,7 @@ impl<'ll, 'tcx> CodegenCx<'ll, 'tcx> {
     /// name.
     pub(crate) fn get_defined_value(&self, name: &str) -> Option<&'ll Value> {
         self.get_declared_value(name).and_then(|val| {
-            let declaration = unsafe { llvm::LLVMIsDeclaration(val) != 0 };
+            let declaration = llvm::is_declaration(val);
             if !declaration { Some(val) } else { None }
         })
     }
diff --git a/compiler/rustc_codegen_llvm/src/errors.rs b/compiler/rustc_codegen_llvm/src/errors.rs
index f340b06e876..f4c9491f758 100644
--- a/compiler/rustc_codegen_llvm/src/errors.rs
+++ b/compiler/rustc_codegen_llvm/src/errors.rs
@@ -37,6 +37,7 @@ pub(crate) struct UnstableCTargetFeature<'a> {
 #[note(codegen_llvm_forbidden_ctarget_feature_issue)]
 pub(crate) struct ForbiddenCTargetFeature<'a> {
     pub feature: &'a str,
+    pub enabled: &'a str,
     pub reason: &'a str,
 }
 
@@ -214,12 +215,6 @@ pub(crate) struct MismatchedDataLayout<'a> {
 }
 
 #[derive(Diagnostic)]
-#[diag(codegen_llvm_invalid_target_feature_prefix)]
-pub(crate) struct InvalidTargetFeaturePrefix<'a> {
-    pub feature: &'a str,
-}
-
-#[derive(Diagnostic)]
 #[diag(codegen_llvm_fixed_x18_invalid_arch)]
 pub(crate) struct FixedX18InvalidArch<'a> {
     pub arch: &'a str,
diff --git a/compiler/rustc_codegen_llvm/src/intrinsic.rs b/compiler/rustc_codegen_llvm/src/intrinsic.rs
index cabcfc9b42b..8b976885904 100644
--- a/compiler/rustc_codegen_llvm/src/intrinsic.rs
+++ b/compiler/rustc_codegen_llvm/src/intrinsic.rs
@@ -333,12 +333,15 @@ impl<'ll, 'tcx> IntrinsicCallBuilderMethods<'tcx> for Builder<'_, 'll, 'tcx> {
                     sym::prefetch_write_instruction => (1, 0),
                     _ => bug!(),
                 };
-                self.call_intrinsic("llvm.prefetch", &[
-                    args[0].immediate(),
-                    self.const_i32(rw),
-                    args[1].immediate(),
-                    self.const_i32(cache_type),
-                ])
+                self.call_intrinsic(
+                    "llvm.prefetch",
+                    &[
+                        args[0].immediate(),
+                        self.const_i32(rw),
+                        args[1].immediate(),
+                        self.const_i32(cache_type),
+                    ],
+                )
             }
             sym::carrying_mul_add => {
                 let (size, signed) = fn_args.type_at(0).int_size_and_signed(self.tcx);
@@ -396,10 +399,10 @@ impl<'ll, 'tcx> IntrinsicCallBuilderMethods<'tcx> for Builder<'_, 'll, 'tcx> {
                 match name {
                     sym::ctlz | sym::cttz => {
                         let y = self.const_bool(false);
-                        let ret = self.call_intrinsic(&format!("llvm.{name}.i{width}"), &[
-                            args[0].immediate(),
-                            y,
-                        ]);
+                        let ret = self.call_intrinsic(
+                            &format!("llvm.{name}.i{width}"),
+                            &[args[0].immediate(), y],
+                        );
 
                         self.intcast(ret, llret_ty, false)
                     }
@@ -416,24 +419,26 @@ impl<'ll, 'tcx> IntrinsicCallBuilderMethods<'tcx> for Builder<'_, 'll, 'tcx> {
                         self.intcast(ret, llret_ty, false)
                     }
                     sym::ctpop => {
-                        let ret = self.call_intrinsic(&format!("llvm.ctpop.i{width}"), &[
-                            args[0].immediate()
-                        ]);
+                        let ret = self.call_intrinsic(
+                            &format!("llvm.ctpop.i{width}"),
+                            &[args[0].immediate()],
+                        );
                         self.intcast(ret, llret_ty, false)
                     }
                     sym::bswap => {
                         if width == 8 {
                             args[0].immediate() // byte swap a u8/i8 is just a no-op
                         } else {
-                            self.call_intrinsic(&format!("llvm.bswap.i{width}"), &[
-                                args[0].immediate()
-                            ])
+                            self.call_intrinsic(
+                                &format!("llvm.bswap.i{width}"),
+                                &[args[0].immediate()],
+                            )
                         }
                     }
-                    sym::bitreverse => self
-                        .call_intrinsic(&format!("llvm.bitreverse.i{width}"), &[
-                            args[0].immediate()
-                        ]),
+                    sym::bitreverse => self.call_intrinsic(
+                        &format!("llvm.bitreverse.i{width}"),
+                        &[args[0].immediate()],
+                    ),
                     sym::rotate_left | sym::rotate_right => {
                         let is_left = name == sym::rotate_left;
                         let val = args[0].immediate();
@@ -500,11 +505,10 @@ impl<'ll, 'tcx> IntrinsicCallBuilderMethods<'tcx> for Builder<'_, 'll, 'tcx> {
 
             sym::compare_bytes => {
                 // Here we assume that the `memcmp` provided by the target is a NOP for size 0.
-                let cmp = self.call_intrinsic("memcmp", &[
-                    args[0].immediate(),
-                    args[1].immediate(),
-                    args[2].immediate(),
-                ]);
+                let cmp = self.call_intrinsic(
+                    "memcmp",
+                    &[args[0].immediate(), args[1].immediate(), args[2].immediate()],
+                );
                 // Some targets have `memcmp` returning `i16`, but the intrinsic is always `i32`.
                 self.sext(cmp, self.type_ix(32))
             }
@@ -820,7 +824,7 @@ fn codegen_msvc_try<'ll>(
         if bx.cx.tcx.sess.target.supports_comdat() {
             llvm::SetUniqueComdat(bx.llmod, tydesc);
         }
-        unsafe { llvm::LLVMSetInitializer(tydesc, type_info) };
+        llvm::set_initializer(tydesc, type_info);
 
         // The flag value of 8 indicates that we are catching the exception by
         // reference instead of by value. We can't use catch by value because
@@ -1081,11 +1085,11 @@ fn codegen_emcc_try<'ll>(
 
 // Helper function to give a Block to a closure to codegen a shim function.
 // This is currently primarily used for the `try` intrinsic functions above.
-fn gen_fn<'ll, 'tcx>(
-    cx: &CodegenCx<'ll, 'tcx>,
+fn gen_fn<'a, 'll, 'tcx>(
+    cx: &'a CodegenCx<'ll, 'tcx>,
     name: &str,
     rust_fn_sig: ty::PolyFnSig<'tcx>,
-    codegen: &mut dyn FnMut(Builder<'_, 'll, 'tcx>),
+    codegen: &mut dyn FnMut(Builder<'a, 'll, 'tcx>),
 ) -> (&'ll Type, &'ll Value) {
     let fn_abi = cx.fn_abi_of_fn_ptr(rust_fn_sig, ty::List::empty());
     let llty = fn_abi.llvm_type(cx);
@@ -1104,9 +1108,9 @@ fn gen_fn<'ll, 'tcx>(
 // catch exceptions.
 //
 // This function is only generated once and is then cached.
-fn get_rust_try_fn<'ll, 'tcx>(
-    cx: &CodegenCx<'ll, 'tcx>,
-    codegen: &mut dyn FnMut(Builder<'_, 'll, 'tcx>),
+fn get_rust_try_fn<'a, 'll, 'tcx>(
+    cx: &'a CodegenCx<'ll, 'tcx>,
+    codegen: &mut dyn FnMut(Builder<'a, 'll, 'tcx>),
 ) -> (&'ll Type, &'ll Value) {
     if let Some(llfn) = cx.rust_try_fn.get() {
         return llfn;
@@ -1182,6 +1186,60 @@ fn generic_simd_intrinsic<'ll, 'tcx>(
         }};
     }
 
+    /// Returns the bitwidth of the `$ty` argument if it is an `Int` type.
+    macro_rules! require_int_ty {
+        ($ty: expr, $diag: expr) => {
+            match $ty {
+                ty::Int(i) => i.bit_width().unwrap_or_else(|| bx.data_layout().pointer_size.bits()),
+                _ => {
+                    return_error!($diag);
+                }
+            }
+        };
+    }
+
+    /// Returns the bitwidth of the `$ty` argument if it is an `Int` or `Uint` type.
+    macro_rules! require_int_or_uint_ty {
+        ($ty: expr, $diag: expr) => {
+            match $ty {
+                ty::Int(i) => i.bit_width().unwrap_or_else(|| bx.data_layout().pointer_size.bits()),
+                ty::Uint(i) => {
+                    i.bit_width().unwrap_or_else(|| bx.data_layout().pointer_size.bits())
+                }
+                _ => {
+                    return_error!($diag);
+                }
+            }
+        };
+    }
+
+    /// Converts a vector mask, where each element has a bit width equal to the data elements it is used with,
+    /// down to an i1 based mask that can be used by llvm intrinsics.
+    ///
+    /// The rust simd semantics are that each element should either consist of all ones or all zeroes,
+    /// but this information is not available to llvm. Truncating the vector effectively uses the lowest bit,
+    /// but codegen for several targets is better if we consider the highest bit by shifting.
+    ///
+    /// For x86 SSE/AVX targets this is beneficial since most instructions with mask parameters only consider the highest bit.
+    /// So even though on llvm level we have an additional shift, in the final assembly there is no shift or truncate and
+    /// instead the mask can be used as is.
+    ///
+    /// For aarch64 and other targets there is a benefit because a mask from the sign bit can be more
+    /// efficiently converted to an all ones / all zeroes mask by comparing whether each element is negative.
+    fn vector_mask_to_bitmask<'a, 'll, 'tcx>(
+        bx: &mut Builder<'a, 'll, 'tcx>,
+        i_xn: &'ll Value,
+        in_elem_bitwidth: u64,
+        in_len: u64,
+    ) -> &'ll Value {
+        // Shift the MSB to the right by "in_elem_bitwidth - 1" into the first bit position.
+        let shift_idx = bx.cx.const_int(bx.type_ix(in_elem_bitwidth), (in_elem_bitwidth - 1) as _);
+        let shift_indices = vec![shift_idx; in_len as _];
+        let i_xn_msb = bx.lshr(i_xn, bx.const_vector(shift_indices.as_slice()));
+        // Truncate vector to an <i1 x N>
+        bx.trunc(i_xn_msb, bx.type_vector(bx.type_i1(), in_len))
+    }
+
     let tcx = bx.tcx();
     let sig = tcx.normalize_erasing_late_bound_regions(bx.typing_env(), callee_ty.fn_sig(tcx));
     let arg_tys = sig.inputs();
@@ -1251,14 +1309,17 @@ fn generic_simd_intrinsic<'ll, 'tcx>(
     if let Some(cmp_op) = comparison {
         let (out_len, out_ty) = require_simd!(ret_ty, SimdReturn);
 
-        require!(in_len == out_len, InvalidMonomorphization::ReturnLengthInputType {
-            span,
-            name,
-            in_len,
-            in_ty,
-            ret_ty,
-            out_len
-        });
+        require!(
+            in_len == out_len,
+            InvalidMonomorphization::ReturnLengthInputType {
+                span,
+                name,
+                in_len,
+                in_ty,
+                ret_ty,
+                out_len
+            }
+        );
         require!(
             bx.type_kind(bx.element_type(llret_ty)) == TypeKind::Integer,
             InvalidMonomorphization::ReturnIntegerType { span, name, ret_ty, out_ty }
@@ -1275,25 +1336,18 @@ fn generic_simd_intrinsic<'ll, 'tcx>(
     }
 
     if name == sym::simd_shuffle_generic {
-        let idx = fn_args[2].expect_const().try_to_valtree().unwrap().0.unwrap_branch();
+        let idx = fn_args[2].expect_const().to_value().valtree.unwrap_branch();
         let n = idx.len() as u64;
 
         let (out_len, out_ty) = require_simd!(ret_ty, SimdReturn);
-        require!(out_len == n, InvalidMonomorphization::ReturnLength {
-            span,
-            name,
-            in_len: n,
-            ret_ty,
-            out_len
-        });
-        require!(in_elem == out_ty, InvalidMonomorphization::ReturnElement {
-            span,
-            name,
-            in_elem,
-            in_ty,
-            ret_ty,
-            out_ty
-        });
+        require!(
+            out_len == n,
+            InvalidMonomorphization::ReturnLength { span, name, in_len: n, ret_ty, out_len }
+        );
+        require!(
+            in_elem == out_ty,
+            InvalidMonomorphization::ReturnElement { span, name, in_elem, in_ty, ret_ty, out_ty }
+        );
 
         let total_len = in_len * 2;
 
@@ -1338,21 +1392,14 @@ fn generic_simd_intrinsic<'ll, 'tcx>(
         };
 
         let (out_len, out_ty) = require_simd!(ret_ty, SimdReturn);
-        require!(out_len == n, InvalidMonomorphization::ReturnLength {
-            span,
-            name,
-            in_len: n,
-            ret_ty,
-            out_len
-        });
-        require!(in_elem == out_ty, InvalidMonomorphization::ReturnElement {
-            span,
-            name,
-            in_elem,
-            in_ty,
-            ret_ty,
-            out_ty
-        });
+        require!(
+            out_len == n,
+            InvalidMonomorphization::ReturnLength { span, name, in_len: n, ret_ty, out_len }
+        );
+        require!(
+            in_elem == out_ty,
+            InvalidMonomorphization::ReturnElement { span, name, in_elem, in_ty, ret_ty, out_ty }
+        );
 
         let total_len = u128::from(in_len) * 2;
 
@@ -1377,13 +1424,16 @@ fn generic_simd_intrinsic<'ll, 'tcx>(
     }
 
     if name == sym::simd_insert {
-        require!(in_elem == arg_tys[2], InvalidMonomorphization::InsertedType {
-            span,
-            name,
-            in_elem,
-            in_ty,
-            out_ty: arg_tys[2]
-        });
+        require!(
+            in_elem == arg_tys[2],
+            InvalidMonomorphization::InsertedType {
+                span,
+                name,
+                in_elem,
+                in_ty,
+                out_ty: arg_tys[2]
+            }
+        );
         let idx = bx
             .const_to_opt_u128(args[1].immediate(), false)
             .expect("typeck should have ensure that this is a const");
@@ -1402,13 +1452,10 @@ fn generic_simd_intrinsic<'ll, 'tcx>(
         ));
     }
     if name == sym::simd_extract {
-        require!(ret_ty == in_elem, InvalidMonomorphization::ReturnType {
-            span,
-            name,
-            in_elem,
-            in_ty,
-            ret_ty
-        });
+        require!(
+            ret_ty == in_elem,
+            InvalidMonomorphization::ReturnType { span, name, in_elem, in_ty, ret_ty }
+        );
         let idx = bx
             .const_to_opt_u128(args[1].immediate(), false)
             .expect("typeck should have ensure that this is a const");
@@ -1427,20 +1474,15 @@ fn generic_simd_intrinsic<'ll, 'tcx>(
         let m_elem_ty = in_elem;
         let m_len = in_len;
         let (v_len, _) = require_simd!(arg_tys[1], SimdArgument);
-        require!(m_len == v_len, InvalidMonomorphization::MismatchedLengths {
-            span,
-            name,
-            m_len,
-            v_len
-        });
-        match m_elem_ty.kind() {
-            ty::Int(_) => {}
-            _ => return_error!(InvalidMonomorphization::MaskType { span, name, ty: m_elem_ty }),
-        }
-        // truncate the mask to a vector of i1s
-        let i1 = bx.type_i1();
-        let i1xn = bx.type_vector(i1, m_len as u64);
-        let m_i1s = bx.trunc(args[0].immediate(), i1xn);
+        require!(
+            m_len == v_len,
+            InvalidMonomorphization::MismatchedLengths { span, name, m_len, v_len }
+        );
+        let in_elem_bitwidth = require_int_ty!(
+            m_elem_ty.kind(),
+            InvalidMonomorphization::MaskType { span, name, ty: m_elem_ty }
+        );
+        let m_i1s = vector_mask_to_bitmask(bx, args[0].immediate(), in_elem_bitwidth, m_len);
         return Ok(bx.select(m_i1s, args[1].immediate(), args[2].immediate()));
     }
 
@@ -1457,33 +1499,12 @@ fn generic_simd_intrinsic<'ll, 'tcx>(
         let expected_bytes = in_len.div_ceil(8);
 
         // Integer vector <i{in_bitwidth} x in_len>:
-        let (i_xn, in_elem_bitwidth) = match in_elem.kind() {
-            ty::Int(i) => (
-                args[0].immediate(),
-                i.bit_width().unwrap_or_else(|| bx.data_layout().pointer_size.bits()),
-            ),
-            ty::Uint(i) => (
-                args[0].immediate(),
-                i.bit_width().unwrap_or_else(|| bx.data_layout().pointer_size.bits()),
-            ),
-            _ => return_error!(InvalidMonomorphization::VectorArgument {
-                span,
-                name,
-                in_ty,
-                in_elem
-            }),
-        };
+        let in_elem_bitwidth = require_int_or_uint_ty!(
+            in_elem.kind(),
+            InvalidMonomorphization::VectorArgument { span, name, in_ty, in_elem }
+        );
 
-        // LLVM doesn't always know the inputs are `0` or `!0`, so we shift here so it optimizes to
-        // `pmovmskb` and similar on x86.
-        let shift_indices =
-            vec![
-                bx.cx.const_int(bx.type_ix(in_elem_bitwidth), (in_elem_bitwidth - 1) as _);
-                in_len as _
-            ];
-        let i_xn_msb = bx.lshr(i_xn, bx.const_vector(shift_indices.as_slice()));
-        // Truncate vector to an <i1 x N>
-        let i1xn = bx.trunc(i_xn_msb, bx.type_vector(bx.type_i1(), in_len));
+        let i1xn = vector_mask_to_bitmask(bx, args[0].immediate(), in_elem_bitwidth, in_len);
         // Bitcast <i1 x N> to iN:
         let i_ = bx.bitcast(i1xn, bx.type_ix(in_len));
 
@@ -1663,30 +1684,34 @@ fn generic_simd_intrinsic<'ll, 'tcx>(
         require_simd!(ret_ty, SimdReturn);
 
         // Of the same length:
-        require!(in_len == out_len, InvalidMonomorphization::SecondArgumentLength {
-            span,
-            name,
-            in_len,
-            in_ty,
-            arg_ty: arg_tys[1],
-            out_len
-        });
-        require!(in_len == out_len2, InvalidMonomorphization::ThirdArgumentLength {
-            span,
-            name,
-            in_len,
-            in_ty,
-            arg_ty: arg_tys[2],
-            out_len: out_len2
-        });
+        require!(
+            in_len == out_len,
+            InvalidMonomorphization::SecondArgumentLength {
+                span,
+                name,
+                in_len,
+                in_ty,
+                arg_ty: arg_tys[1],
+                out_len
+            }
+        );
+        require!(
+            in_len == out_len2,
+            InvalidMonomorphization::ThirdArgumentLength {
+                span,
+                name,
+                in_len,
+                in_ty,
+                arg_ty: arg_tys[2],
+                out_len: out_len2
+            }
+        );
 
         // The return type must match the first argument type
-        require!(ret_ty == in_ty, InvalidMonomorphization::ExpectedReturnType {
-            span,
-            name,
-            in_ty,
-            ret_ty
-        });
+        require!(
+            ret_ty == in_ty,
+            InvalidMonomorphization::ExpectedReturnType { span, name, in_ty, ret_ty }
+        );
 
         require!(
             matches!(
@@ -1704,28 +1729,23 @@ fn generic_simd_intrinsic<'ll, 'tcx>(
             }
         );
 
-        match element_ty2.kind() {
-            ty::Int(_) => (),
-            _ => {
-                return_error!(InvalidMonomorphization::ThirdArgElementType {
-                    span,
-                    name,
-                    expected_element: element_ty2,
-                    third_arg: arg_tys[2]
-                });
+        let mask_elem_bitwidth = require_int_ty!(
+            element_ty2.kind(),
+            InvalidMonomorphization::ThirdArgElementType {
+                span,
+                name,
+                expected_element: element_ty2,
+                third_arg: arg_tys[2]
             }
-        }
+        );
 
         // Alignment of T, must be a constant integer value:
         let alignment_ty = bx.type_i32();
         let alignment = bx.const_i32(bx.align_of(in_elem).bytes() as i32);
 
         // Truncate the mask vector to a vector of i1s:
-        let (mask, mask_ty) = {
-            let i1 = bx.type_i1();
-            let i1xn = bx.type_vector(i1, in_len);
-            (bx.trunc(args[2].immediate(), i1xn), i1xn)
-        };
+        let mask = vector_mask_to_bitmask(bx, args[2].immediate(), mask_elem_bitwidth, in_len);
+        let mask_ty = bx.type_vector(bx.type_i1(), in_len);
 
         // Type of the vector of pointers:
         let llvm_pointer_vec_ty = llvm_vector_ty(bx, element_ty1, in_len);
@@ -1777,22 +1797,23 @@ fn generic_simd_intrinsic<'ll, 'tcx>(
         require_simd!(ret_ty, SimdReturn);
 
         // Of the same length:
-        require!(values_len == mask_len, InvalidMonomorphization::ThirdArgumentLength {
-            span,
-            name,
-            in_len: mask_len,
-            in_ty: mask_ty,
-            arg_ty: values_ty,
-            out_len: values_len
-        });
+        require!(
+            values_len == mask_len,
+            InvalidMonomorphization::ThirdArgumentLength {
+                span,
+                name,
+                in_len: mask_len,
+                in_ty: mask_ty,
+                arg_ty: values_ty,
+                out_len: values_len
+            }
+        );
 
         // The return type must match the last argument type
-        require!(ret_ty == values_ty, InvalidMonomorphization::ExpectedReturnType {
-            span,
-            name,
-            in_ty: values_ty,
-            ret_ty
-        });
+        require!(
+            ret_ty == values_ty,
+            InvalidMonomorphization::ExpectedReturnType { span, name, in_ty: values_ty, ret_ty }
+        );
 
         require!(
             matches!(
@@ -1810,8 +1831,8 @@ fn generic_simd_intrinsic<'ll, 'tcx>(
             }
         );
 
-        require!(
-            matches!(mask_elem.kind(), ty::Int(_)),
+        let m_elem_bitwidth = require_int_ty!(
+            mask_elem.kind(),
             InvalidMonomorphization::ThirdArgElementType {
                 span,
                 name,
@@ -1820,17 +1841,13 @@ fn generic_simd_intrinsic<'ll, 'tcx>(
             }
         );
 
+        let mask = vector_mask_to_bitmask(bx, args[0].immediate(), m_elem_bitwidth, mask_len);
+        let mask_ty = bx.type_vector(bx.type_i1(), mask_len);
+
         // Alignment of T, must be a constant integer value:
         let alignment_ty = bx.type_i32();
         let alignment = bx.const_i32(bx.align_of(values_elem).bytes() as i32);
 
-        // Truncate the mask vector to a vector of i1s:
-        let (mask, mask_ty) = {
-            let i1 = bx.type_i1();
-            let i1xn = bx.type_vector(i1, mask_len);
-            (bx.trunc(args[0].immediate(), i1xn), i1xn)
-        };
-
         let llvm_pointer = bx.type_ptr();
 
         // Type of the vector of elements:
@@ -1874,14 +1891,17 @@ fn generic_simd_intrinsic<'ll, 'tcx>(
         let (values_len, values_elem) = require_simd!(values_ty, SimdThird);
 
         // Of the same length:
-        require!(values_len == mask_len, InvalidMonomorphization::ThirdArgumentLength {
-            span,
-            name,
-            in_len: mask_len,
-            in_ty: mask_ty,
-            arg_ty: values_ty,
-            out_len: values_len
-        });
+        require!(
+            values_len == mask_len,
+            InvalidMonomorphization::ThirdArgumentLength {
+                span,
+                name,
+                in_len: mask_len,
+                in_ty: mask_ty,
+                arg_ty: values_ty,
+                out_len: values_len
+            }
+        );
 
         // The second argument must be a mutable pointer type matching the element type
         require!(
@@ -1901,8 +1921,8 @@ fn generic_simd_intrinsic<'ll, 'tcx>(
             }
         );
 
-        require!(
-            matches!(mask_elem.kind(), ty::Int(_)),
+        let m_elem_bitwidth = require_int_ty!(
+            mask_elem.kind(),
             InvalidMonomorphization::ThirdArgElementType {
                 span,
                 name,
@@ -1911,17 +1931,13 @@ fn generic_simd_intrinsic<'ll, 'tcx>(
             }
         );
 
+        let mask = vector_mask_to_bitmask(bx, args[0].immediate(), m_elem_bitwidth, mask_len);
+        let mask_ty = bx.type_vector(bx.type_i1(), mask_len);
+
         // Alignment of T, must be a constant integer value:
         let alignment_ty = bx.type_i32();
         let alignment = bx.const_i32(bx.align_of(values_elem).bytes() as i32);
 
-        // Truncate the mask vector to a vector of i1s:
-        let (mask, mask_ty) = {
-            let i1 = bx.type_i1();
-            let i1xn = bx.type_vector(i1, in_len);
-            (bx.trunc(args[0].immediate(), i1xn), i1xn)
-        };
-
         let ret_t = bx.type_void();
 
         let llvm_pointer = bx.type_ptr();
@@ -1960,22 +1976,28 @@ fn generic_simd_intrinsic<'ll, 'tcx>(
         let (element_len2, element_ty2) = require_simd!(arg_tys[2], SimdThird);
 
         // Of the same length:
-        require!(in_len == element_len1, InvalidMonomorphization::SecondArgumentLength {
-            span,
-            name,
-            in_len,
-            in_ty,
-            arg_ty: arg_tys[1],
-            out_len: element_len1
-        });
-        require!(in_len == element_len2, InvalidMonomorphization::ThirdArgumentLength {
-            span,
-            name,
-            in_len,
-            in_ty,
-            arg_ty: arg_tys[2],
-            out_len: element_len2
-        });
+        require!(
+            in_len == element_len1,
+            InvalidMonomorphization::SecondArgumentLength {
+                span,
+                name,
+                in_len,
+                in_ty,
+                arg_ty: arg_tys[1],
+                out_len: element_len1
+            }
+        );
+        require!(
+            in_len == element_len2,
+            InvalidMonomorphization::ThirdArgumentLength {
+                span,
+                name,
+                in_len,
+                in_ty,
+                arg_ty: arg_tys[2],
+                out_len: element_len2
+            }
+        );
 
         require!(
             matches!(
@@ -1995,28 +2017,23 @@ fn generic_simd_intrinsic<'ll, 'tcx>(
         );
 
         // The element type of the third argument must be a signed integer type of any width:
-        match element_ty2.kind() {
-            ty::Int(_) => (),
-            _ => {
-                return_error!(InvalidMonomorphization::ThirdArgElementType {
-                    span,
-                    name,
-                    expected_element: element_ty2,
-                    third_arg: arg_tys[2]
-                });
+        let mask_elem_bitwidth = require_int_ty!(
+            element_ty2.kind(),
+            InvalidMonomorphization::ThirdArgElementType {
+                span,
+                name,
+                expected_element: element_ty2,
+                third_arg: arg_tys[2]
             }
-        }
+        );
 
         // Alignment of T, must be a constant integer value:
         let alignment_ty = bx.type_i32();
         let alignment = bx.const_i32(bx.align_of(in_elem).bytes() as i32);
 
         // Truncate the mask vector to a vector of i1s:
-        let (mask, mask_ty) = {
-            let i1 = bx.type_i1();
-            let i1xn = bx.type_vector(i1, in_len);
-            (bx.trunc(args[2].immediate(), i1xn), i1xn)
-        };
+        let mask = vector_mask_to_bitmask(bx, args[2].immediate(), mask_elem_bitwidth, in_len);
+        let mask_ty = bx.type_vector(bx.type_i1(), in_len);
 
         let ret_t = bx.type_void();
 
@@ -2049,13 +2066,10 @@ fn generic_simd_intrinsic<'ll, 'tcx>(
         ($name:ident : $integer_reduce:ident, $float_reduce:ident, $ordered:expr, $op:ident,
          $identity:expr) => {
             if name == sym::$name {
-                require!(ret_ty == in_elem, InvalidMonomorphization::ReturnType {
-                    span,
-                    name,
-                    in_elem,
-                    in_ty,
-                    ret_ty
-                });
+                require!(
+                    ret_ty == in_elem,
+                    InvalidMonomorphization::ReturnType { span, name, in_elem, in_ty, ret_ty }
+                );
                 return match in_elem.kind() {
                     ty::Int(_) | ty::Uint(_) => {
                         let r = bx.$integer_reduce(args[0].immediate());
@@ -2124,13 +2138,10 @@ fn generic_simd_intrinsic<'ll, 'tcx>(
     macro_rules! minmax_red {
         ($name:ident: $int_red:ident, $float_red:ident) => {
             if name == sym::$name {
-                require!(ret_ty == in_elem, InvalidMonomorphization::ReturnType {
-                    span,
-                    name,
-                    in_elem,
-                    in_ty,
-                    ret_ty
-                });
+                require!(
+                    ret_ty == in_elem,
+                    InvalidMonomorphization::ReturnType { span, name, in_elem, in_ty, ret_ty }
+                );
                 return match in_elem.kind() {
                     ty::Int(_i) => Ok(bx.$int_red(args[0].immediate(), true)),
                     ty::Uint(_u) => Ok(bx.$int_red(args[0].immediate(), false)),
@@ -2155,17 +2166,19 @@ fn generic_simd_intrinsic<'ll, 'tcx>(
         ($name:ident : $red:ident, $boolean:expr) => {
             if name == sym::$name {
                 let input = if !$boolean {
-                    require!(ret_ty == in_elem, InvalidMonomorphization::ReturnType {
-                        span,
-                        name,
-                        in_elem,
-                        in_ty,
-                        ret_ty
-                    });
+                    require!(
+                        ret_ty == in_elem,
+                        InvalidMonomorphization::ReturnType { span, name, in_elem, in_ty, ret_ty }
+                    );
                     args[0].immediate()
                 } else {
-                    match in_elem.kind() {
-                        ty::Int(_) | ty::Uint(_) => {}
+                    let bitwidth = match in_elem.kind() {
+                        ty::Int(i) => {
+                            i.bit_width().unwrap_or_else(|| bx.data_layout().pointer_size.bits())
+                        }
+                        ty::Uint(i) => {
+                            i.bit_width().unwrap_or_else(|| bx.data_layout().pointer_size.bits())
+                        }
                         _ => return_error!(InvalidMonomorphization::UnsupportedSymbol {
                             span,
                             name,
@@ -2174,12 +2187,9 @@ fn generic_simd_intrinsic<'ll, 'tcx>(
                             in_elem,
                             ret_ty
                         }),
-                    }
+                    };
 
-                    // boolean reductions operate on vectors of i1s:
-                    let i1 = bx.type_i1();
-                    let i1xn = bx.type_vector(i1, in_len as u64);
-                    bx.trunc(args[0].immediate(), i1xn)
+                    vector_mask_to_bitmask(bx, args[0].immediate(), bitwidth, in_len as _)
                 };
                 return match in_elem.kind() {
                     ty::Int(_) | ty::Uint(_) => {
@@ -2207,25 +2217,27 @@ fn generic_simd_intrinsic<'ll, 'tcx>(
 
     if name == sym::simd_cast_ptr {
         let (out_len, out_elem) = require_simd!(ret_ty, SimdReturn);
-        require!(in_len == out_len, InvalidMonomorphization::ReturnLengthInputType {
-            span,
-            name,
-            in_len,
-            in_ty,
-            ret_ty,
-            out_len
-        });
+        require!(
+            in_len == out_len,
+            InvalidMonomorphization::ReturnLengthInputType {
+                span,
+                name,
+                in_len,
+                in_ty,
+                ret_ty,
+                out_len
+            }
+        );
 
         match in_elem.kind() {
             ty::RawPtr(p_ty, _) => {
                 let metadata = p_ty.ptr_metadata_ty(bx.tcx, |ty| {
                     bx.tcx.normalize_erasing_regions(bx.typing_env(), ty)
                 });
-                require!(metadata.is_unit(), InvalidMonomorphization::CastWidePointer {
-                    span,
-                    name,
-                    ty: in_elem
-                });
+                require!(
+                    metadata.is_unit(),
+                    InvalidMonomorphization::CastWidePointer { span, name, ty: in_elem }
+                );
             }
             _ => {
                 return_error!(InvalidMonomorphization::ExpectedPointer { span, name, ty: in_elem })
@@ -2236,11 +2248,10 @@ fn generic_simd_intrinsic<'ll, 'tcx>(
                 let metadata = p_ty.ptr_metadata_ty(bx.tcx, |ty| {
                     bx.tcx.normalize_erasing_regions(bx.typing_env(), ty)
                 });
-                require!(metadata.is_unit(), InvalidMonomorphization::CastWidePointer {
-                    span,
-                    name,
-                    ty: out_elem
-                });
+                require!(
+                    metadata.is_unit(),
+                    InvalidMonomorphization::CastWidePointer { span, name, ty: out_elem }
+                );
             }
             _ => {
                 return_error!(InvalidMonomorphization::ExpectedPointer { span, name, ty: out_elem })
@@ -2252,14 +2263,17 @@ fn generic_simd_intrinsic<'ll, 'tcx>(
 
     if name == sym::simd_expose_provenance {
         let (out_len, out_elem) = require_simd!(ret_ty, SimdReturn);
-        require!(in_len == out_len, InvalidMonomorphization::ReturnLengthInputType {
-            span,
-            name,
-            in_len,
-            in_ty,
-            ret_ty,
-            out_len
-        });
+        require!(
+            in_len == out_len,
+            InvalidMonomorphization::ReturnLengthInputType {
+                span,
+                name,
+                in_len,
+                in_ty,
+                ret_ty,
+                out_len
+            }
+        );
 
         match in_elem.kind() {
             ty::RawPtr(_, _) => {}
@@ -2277,14 +2291,17 @@ fn generic_simd_intrinsic<'ll, 'tcx>(
 
     if name == sym::simd_with_exposed_provenance {
         let (out_len, out_elem) = require_simd!(ret_ty, SimdReturn);
-        require!(in_len == out_len, InvalidMonomorphization::ReturnLengthInputType {
-            span,
-            name,
-            in_len,
-            in_ty,
-            ret_ty,
-            out_len
-        });
+        require!(
+            in_len == out_len,
+            InvalidMonomorphization::ReturnLengthInputType {
+                span,
+                name,
+                in_len,
+                in_ty,
+                ret_ty,
+                out_len
+            }
+        );
 
         match in_elem.kind() {
             ty::Uint(ty::UintTy::Usize) => {}
@@ -2302,14 +2319,17 @@ fn generic_simd_intrinsic<'ll, 'tcx>(
 
     if name == sym::simd_cast || name == sym::simd_as {
         let (out_len, out_elem) = require_simd!(ret_ty, SimdReturn);
-        require!(in_len == out_len, InvalidMonomorphization::ReturnLengthInputType {
-            span,
-            name,
-            in_len,
-            in_ty,
-            ret_ty,
-            out_len
-        });
+        require!(
+            in_len == out_len,
+            InvalidMonomorphization::ReturnLengthInputType {
+                span,
+                name,
+                in_len,
+                in_ty,
+                ret_ty,
+                out_len
+            }
+        );
         // casting cares about nominal type, not just structural type
         if in_elem == out_elem {
             return Ok(args[0].immediate());
diff --git a/compiler/rustc_codegen_llvm/src/lib.rs b/compiler/rustc_codegen_llvm/src/lib.rs
index 06afe8bb3ad..bcb1cc72b56 100644
--- a/compiler/rustc_codegen_llvm/src/lib.rs
+++ b/compiler/rustc_codegen_llvm/src/lib.rs
@@ -13,6 +13,7 @@
 #![feature(extern_types)]
 #![feature(file_buffered)]
 #![feature(hash_raw_entry)]
+#![feature(if_let_guard)]
 #![feature(impl_trait_in_assoc_type)]
 #![feature(iter_intersperse)]
 #![feature(let_chains)]
@@ -29,7 +30,7 @@ use std::mem::ManuallyDrop;
 use back::owned_target_machine::OwnedTargetMachine;
 use back::write::{create_informational_target_machine, create_target_machine};
 use errors::{AutoDiffWithoutLTO, ParseTargetMachineConfig};
-pub use llvm_util::target_features_cfg;
+pub(crate) use llvm_util::target_features_cfg;
 use rustc_ast::expand::allocator::AllocatorKind;
 use rustc_ast::expand::autodiff_attrs::AutoDiffItem;
 use rustc_codegen_ssa::back::lto::{LtoModuleCodegen, SerializedModule, ThinModule};
@@ -71,14 +72,9 @@ mod debuginfo;
 mod declare;
 mod errors;
 mod intrinsic;
-
-// The following is a workaround that replaces `pub mod llvm;` and that fixes issue 53912.
-#[path = "llvm/mod.rs"]
-mod llvm_;
-pub mod llvm {
-    pub use super::llvm_::*;
-}
-
+// FIXME(Zalathar): Fix all the unreachable-pub warnings that would occur if
+// this isn't pub, then make it not pub.
+pub mod llvm;
 mod llvm_util;
 mod mono_item;
 mod type_;
@@ -237,7 +233,6 @@ impl WriteBackendMethods for LlvmCodegenBackend {
     /// Generate autodiff rules
     fn autodiff(
         cgcx: &CodegenContext<Self>,
-        tcx: TyCtxt<'_>,
         module: &ModuleCodegen<Self::Module>,
         diff_fncs: Vec<AutoDiffItem>,
         config: &ModuleConfig,
@@ -246,7 +241,7 @@ impl WriteBackendMethods for LlvmCodegenBackend {
             let dcx = cgcx.create_dcx();
             return Err(dcx.handle().emit_almost_fatal(AutoDiffWithoutLTO));
         }
-        builder::autodiff::differentiate(module, cgcx, tcx, diff_fncs, config)
+        builder::autodiff::differentiate(module, cgcx, diff_fncs, config)
     }
 }
 
diff --git a/compiler/rustc_codegen_llvm/src/llvm/enzyme_ffi.rs b/compiler/rustc_codegen_llvm/src/llvm/enzyme_ffi.rs
index 729d6f62e24..92b0ce8ffe1 100644
--- a/compiler/rustc_codegen_llvm/src/llvm/enzyme_ffi.rs
+++ b/compiler/rustc_codegen_llvm/src/llvm/enzyme_ffi.rs
@@ -4,16 +4,23 @@ use libc::{c_char, c_uint};
 
 use super::ffi::{BasicBlock, Metadata, Module, Type, Value};
 use crate::llvm::Bool;
+
+#[link(name = "llvm-wrapper", kind = "static")]
 extern "C" {
     // Enzyme
     pub fn LLVMRustHasMetadata(I: &Value, KindID: c_uint) -> bool;
-    pub fn LLVMRustEraseInstBefore(BB: &BasicBlock, I: &Value);
+    pub fn LLVMRustEraseInstUntilInclusive(BB: &BasicBlock, I: &Value);
     pub fn LLVMRustGetLastInstruction<'a>(BB: &BasicBlock) -> Option<&'a Value>;
     pub fn LLVMRustDIGetInstMetadata(I: &Value) -> Option<&Metadata>;
     pub fn LLVMRustEraseInstFromParent(V: &Value);
     pub fn LLVMRustGetTerminator<'a>(B: &BasicBlock) -> &'a Value;
     pub fn LLVMRustVerifyFunction(V: &Value, action: LLVMRustVerifierFailureAction) -> Bool;
+}
 
+extern "C" {
+    // Enzyme
+    pub fn LLVMDumpModule(M: &Module);
+    pub fn LLVMDumpValue(V: &Value);
     pub fn LLVMGetFunctionCallConv(F: &Value) -> c_uint;
     pub fn LLVMGetReturnType(T: &Type) -> &Type;
     pub fn LLVMGetParams(Fnc: &Value, parms: *mut &Value);
diff --git a/compiler/rustc_codegen_llvm/src/llvm/ffi.rs b/compiler/rustc_codegen_llvm/src/llvm/ffi.rs
index 472d4a3a72b..441d144ce50 100644
--- a/compiler/rustc_codegen_llvm/src/llvm/ffi.rs
+++ b/compiler/rustc_codegen_llvm/src/llvm/ffi.rs
@@ -1,3 +1,15 @@
+//! Bindings to the LLVM-C API (`LLVM*`), and to our own `extern "C"` wrapper
+//! functions around the unstable LLVM C++ API (`LLVMRust*`).
+//!
+//! ## Passing pointer/length strings as `*const c_uchar`
+//!
+//! Normally it's a good idea for Rust-side bindings to match the corresponding
+//! C-side function declarations as closely as possible. But when passing `&str`
+//! or `&[u8]` data as a pointer/length pair, it's more convenient to declare
+//! the Rust-side pointer as `*const c_uchar` instead of `*const c_char`.
+//! Both pointer types have the same ABI, and using `*const c_uchar` avoids
+//! the need for an extra cast from `*const u8` on the Rust side.
+
 #![allow(non_camel_case_types)]
 #![allow(non_upper_case_globals)]
 
@@ -5,17 +17,18 @@ use std::fmt::Debug;
 use std::marker::PhantomData;
 use std::ptr;
 
-use libc::{c_char, c_int, c_uint, c_ulonglong, c_void, size_t};
+use bitflags::bitflags;
+use libc::{c_char, c_int, c_uchar, c_uint, c_ulonglong, c_void, size_t};
 use rustc_macros::TryFromU32;
 use rustc_target::spec::SymbolVisibility;
 
 use super::RustString;
 use super::debuginfo::{
     DIArray, DIBasicType, DIBuilder, DICompositeType, DIDerivedType, DIDescriptor, DIEnumerator,
-    DIFile, DIFlags, DIGlobalVariableExpression, DILexicalBlock, DILocation, DINameSpace,
-    DISPFlags, DIScope, DISubprogram, DISubrange, DITemplateTypeParameter, DIType, DIVariable,
-    DebugEmissionKind, DebugNameTableKind,
+    DIFile, DIFlags, DIGlobalVariableExpression, DILocation, DISPFlags, DIScope, DISubprogram,
+    DISubrange, DITemplateTypeParameter, DIType, DIVariable, DebugEmissionKind, DebugNameTableKind,
 };
+use crate::llvm;
 
 /// In the LLVM-C API, boolean values are passed as `typedef int LLVMBool`,
 /// which has a different ABI from Rust or C++ `bool`.
@@ -56,19 +69,6 @@ pub enum LLVMRustResult {
     Failure,
 }
 
-/// Translation of LLVM's MachineTypes enum, defined in llvm\include\llvm\BinaryFormat\COFF.h.
-///
-/// We include only architectures supported on Windows.
-#[derive(Copy, Clone, PartialEq)]
-#[repr(C)]
-pub enum LLVMMachineType {
-    AMD64 = 0x8664,
-    I386 = 0x14c,
-    ARM64 = 0xaa64,
-    ARM64EC = 0xa641,
-    ARM = 0x01c0,
-}
-
 /// Must match the layout of `LLVMRustModuleFlagMergeBehavior`.
 ///
 /// When merging modules (e.g. during LTO), their metadata flags are combined. Conflicts are
@@ -120,6 +120,7 @@ pub enum CallConv {
     X86_Intr = 83,
     AvrNonBlockingInterrupt = 84,
     AvrInterrupt = 85,
+    AmdgpuKernel = 91,
 }
 
 /// Must match the layout of `LLVMLinkage`.
@@ -631,16 +632,6 @@ pub enum ThreadLocalMode {
     LocalExec,
 }
 
-/// LLVMRustTailCallKind
-#[derive(Copy, Clone)]
-#[repr(C)]
-pub enum TailCallKind {
-    None,
-    Tail,
-    MustTail,
-    NoTail,
-}
-
 /// LLVMRustChecksumKind
 #[derive(Copy, Clone)]
 #[repr(C)]
@@ -660,6 +651,79 @@ pub enum MemoryEffects {
     InaccessibleMemOnly,
 }
 
+/// LLVMOpcode
+#[derive(Copy, Clone, PartialEq, Eq)]
+#[repr(C)]
+pub enum Opcode {
+    Ret = 1,
+    Br = 2,
+    Switch = 3,
+    IndirectBr = 4,
+    Invoke = 5,
+    Unreachable = 7,
+    CallBr = 67,
+    FNeg = 66,
+    Add = 8,
+    FAdd = 9,
+    Sub = 10,
+    FSub = 11,
+    Mul = 12,
+    FMul = 13,
+    UDiv = 14,
+    SDiv = 15,
+    FDiv = 16,
+    URem = 17,
+    SRem = 18,
+    FRem = 19,
+    Shl = 20,
+    LShr = 21,
+    AShr = 22,
+    And = 23,
+    Or = 24,
+    Xor = 25,
+    Alloca = 26,
+    Load = 27,
+    Store = 28,
+    GetElementPtr = 29,
+    Trunc = 30,
+    ZExt = 31,
+    SExt = 32,
+    FPToUI = 33,
+    FPToSI = 34,
+    UIToFP = 35,
+    SIToFP = 36,
+    FPTrunc = 37,
+    FPExt = 38,
+    PtrToInt = 39,
+    IntToPtr = 40,
+    BitCast = 41,
+    AddrSpaceCast = 60,
+    ICmp = 42,
+    FCmp = 43,
+    PHI = 44,
+    Call = 45,
+    Select = 46,
+    UserOp1 = 47,
+    UserOp2 = 48,
+    VAArg = 49,
+    ExtractElement = 50,
+    InsertElement = 51,
+    ShuffleVector = 52,
+    ExtractValue = 53,
+    InsertValue = 54,
+    Freeze = 68,
+    Fence = 55,
+    AtomicCmpXchg = 56,
+    AtomicRMW = 57,
+    Resume = 58,
+    LandingPad = 59,
+    CleanupRet = 61,
+    CatchRet = 62,
+    CatchPad = 63,
+    CleanupPad = 64,
+    CatchSwitch = 65,
+}
+
 unsafe extern "C" {
     type Opaque;
 }
@@ -686,7 +750,6 @@ pub struct Builder<'a>(InvariantOpaque<'a>);
 #[repr(C)]
 pub struct PassManager<'a>(InvariantOpaque<'a>);
 unsafe extern "C" {
-    pub type Pass;
     pub type TargetMachine;
     pub type Archive;
 }
@@ -712,15 +775,52 @@ unsafe extern "C" {
 }
 
 pub type DiagnosticHandlerTy = unsafe extern "C" fn(&DiagnosticInfo, *mut c_void);
-pub type InlineAsmDiagHandlerTy = unsafe extern "C" fn(&SMDiagnostic, *const c_void, c_uint);
 
 pub mod debuginfo {
+    use std::ptr;
+
     use bitflags::bitflags;
 
     use super::{InvariantOpaque, Metadata};
+    use crate::llvm::{self, Module};
 
+    /// Opaque target type for references to an LLVM debuginfo builder.
+    ///
+    /// `&'_ DIBuilder<'ll>` corresponds to `LLVMDIBuilderRef`, which is the
+    /// LLVM-C wrapper for `DIBuilder *`.
+    ///
+    /// Debuginfo builders are created and destroyed during codegen, so the
+    /// builder reference typically has a shorter lifetime than the LLVM
+    /// session (`'ll`) that it participates in.
     #[repr(C)]
-    pub struct DIBuilder<'a>(InvariantOpaque<'a>);
+    pub struct DIBuilder<'ll>(InvariantOpaque<'ll>);
+
+    /// Owning pointer to a `DIBuilder<'ll>` that will dispose of the builder
+    /// when dropped. Use `.as_ref()` to get the underlying `&DIBuilder`
+    /// needed for debuginfo FFI calls.
+    pub(crate) struct DIBuilderBox<'ll> {
+        raw: ptr::NonNull<DIBuilder<'ll>>,
+    }
+
+    impl<'ll> DIBuilderBox<'ll> {
+        pub(crate) fn new(llmod: &'ll Module) -> Self {
+            let raw = unsafe { llvm::LLVMCreateDIBuilder(llmod) };
+            let raw = ptr::NonNull::new(raw).unwrap();
+            Self { raw }
+        }
+
+        pub(crate) fn as_ref(&self) -> &DIBuilder<'ll> {
+            // SAFETY: This is an owning pointer, so `&DIBuilder` is valid
+            // for as long as `&self` is.
+            unsafe { self.raw.as_ref() }
+        }
+    }
+
+    impl<'ll> Drop for DIBuilderBox<'ll> {
+        fn drop(&mut self) {
+            unsafe { llvm::LLVMDisposeDIBuilder(self.raw) };
+        }
+    }
 
     pub type DIDescriptor = Metadata;
     pub type DILocation = Metadata;
@@ -728,7 +828,6 @@ pub mod debuginfo {
     pub type DIFile = DIScope;
     pub type DILexicalBlock = DIScope;
     pub type DISubprogram = DIScope;
-    pub type DINameSpace = DIScope;
     pub type DIType = DIDescriptor;
     pub type DIBasicType = DIType;
     pub type DIDerivedType = DIType;
@@ -740,8 +839,11 @@ pub mod debuginfo {
     pub type DIEnumerator = DIDescriptor;
     pub type DITemplateTypeParameter = DIDescriptor;
 
-    // These values **must** match with LLVMRustDIFlags!!
     bitflags! {
+        /// Must match the layout of `LLVMDIFlags` in the LLVM-C API.
+        ///
+        /// Each value declared here must also be covered by the static
+        /// assertions in `RustWrapper.cpp` used by `fromRust(LLVMDIFlags)`.
         #[repr(transparent)]
         #[derive(Clone, Copy, Default)]
         pub struct DIFlags: u32 {
@@ -751,7 +853,7 @@ pub mod debuginfo {
             const FlagPublic              = 3;
             const FlagFwdDecl             = (1 << 2);
             const FlagAppleBlock          = (1 << 3);
-            const FlagBlockByrefStruct    = (1 << 4);
+            const FlagReservedBit4        = (1 << 4);
             const FlagVirtual             = (1 << 5);
             const FlagArtificial          = (1 << 6);
             const FlagExplicit            = (1 << 7);
@@ -762,10 +864,21 @@ pub mod debuginfo {
             const FlagStaticMember        = (1 << 12);
             const FlagLValueReference     = (1 << 13);
             const FlagRValueReference     = (1 << 14);
-            const FlagExternalTypeRef     = (1 << 15);
+            const FlagReserved            = (1 << 15);
+            const FlagSingleInheritance   = (1 << 16);
+            const FlagMultipleInheritance = (2 << 16);
+            const FlagVirtualInheritance  = (3 << 16);
             const FlagIntroducedVirtual   = (1 << 18);
             const FlagBitField            = (1 << 19);
             const FlagNoReturn            = (1 << 20);
+            // The bit at (1 << 21) is unused, but was `LLVMDIFlagMainSubprogram`.
+            const FlagTypePassByValue     = (1 << 22);
+            const FlagTypePassByReference = (1 << 23);
+            const FlagEnumClass           = (1 << 24);
+            const FlagThunk               = (1 << 25);
+            const FlagNonTrivial          = (1 << 26);
+            const FlagBigEndian           = (1 << 27);
+            const FlagLittleEndian        = (1 << 28);
         }
     }
 
@@ -826,7 +939,6 @@ pub mod debuginfo {
     }
 }
 
-use bitflags::bitflags;
 // These values **must** match with LLVMRustAllocKindFlags
 bitflags! {
     #[repr(transparent)]
@@ -917,6 +1029,7 @@ unsafe extern "C" {
     pub fn LLVMMetadataTypeInContext(C: &Context) -> &Type;
 
     // Operations on all values
+    pub fn LLVMIsUndef(Val: &Value) -> Bool;
     pub fn LLVMTypeOf(Val: &Value) -> &Type;
     pub fn LLVMGetValueName2(Val: &Value, Length: *mut size_t) -> *const c_char;
     pub fn LLVMSetValueName2(Val: &Value, Name: *const c_char, NameLen: size_t);
@@ -975,7 +1088,10 @@ unsafe extern "C" {
     pub fn LLVMConstPtrToInt<'a>(ConstantVal: &'a Value, ToType: &'a Type) -> &'a Value;
     pub fn LLVMConstIntToPtr<'a>(ConstantVal: &'a Value, ToType: &'a Type) -> &'a Value;
     pub fn LLVMConstBitCast<'a>(ConstantVal: &'a Value, ToType: &'a Type) -> &'a Value;
+    pub fn LLVMConstPointerCast<'a>(ConstantVal: &'a Value, ToType: &'a Type) -> &'a Value;
     pub fn LLVMGetAggregateElement(ConstantVal: &Value, Idx: c_uint) -> Option<&Value>;
+    pub fn LLVMGetConstOpcode(ConstantVal: &Value) -> Opcode;
+    pub fn LLVMIsAConstantExpr(Val: &Value) -> Option<&Value>;
 
     // Operations on global variables, functions, and aliases (globals)
     pub fn LLVMIsDeclaration(Global: &Value) -> Bool;
@@ -1032,6 +1148,7 @@ unsafe extern "C" {
     // Operations on instructions
     pub fn LLVMIsAInstruction(Val: &Value) -> Option<&Value>;
     pub fn LLVMGetFirstBasicBlock(Fn: &Value) -> &BasicBlock;
+    pub fn LLVMGetOperand(Val: &Value, Index: c_uint) -> Option<&Value>;
 
     // Operations on call sites
     pub fn LLVMSetInstructionCallConv(Instr: &Value, CC: c_uint);
@@ -1287,6 +1404,9 @@ unsafe extern "C" {
     pub fn LLVMBuildFNeg<'a>(B: &Builder<'a>, V: &'a Value, Name: *const c_char) -> &'a Value;
     pub fn LLVMBuildNot<'a>(B: &Builder<'a>, V: &'a Value, Name: *const c_char) -> &'a Value;
 
+    // Extra flags on arithmetic
+    pub fn LLVMSetIsDisjoint(Instr: &Value, IsDisjoint: Bool);
+
     // Memory
     pub fn LLVMBuildAlloca<'a>(B: &Builder<'a>, Ty: &'a Type, Name: *const c_char) -> &'a Value;
     pub fn LLVMBuildArrayAlloca<'a>(
@@ -1579,6 +1699,50 @@ unsafe extern "C" {
     ) -> &'a Value;
 }
 
+// FFI bindings for `DIBuilder` functions in the LLVM-C API.
+// Try to keep these in the same order as in `llvm/include/llvm-c/DebugInfo.h`.
+//
+// FIXME(#134001): Audit all `Option` parameters, especially in lists, to check
+// that they really are nullable on the C/C++ side. LLVM doesn't appear to
+// actually document which ones are nullable.
+unsafe extern "C" {
+    pub(crate) fn LLVMCreateDIBuilder<'ll>(M: &'ll Module) -> *mut DIBuilder<'ll>;
+    pub(crate) fn LLVMDisposeDIBuilder<'ll>(Builder: ptr::NonNull<DIBuilder<'ll>>);
+
+    pub(crate) fn LLVMDIBuilderFinalize<'ll>(Builder: &DIBuilder<'ll>);
+
+    pub(crate) fn LLVMDIBuilderCreateNameSpace<'ll>(
+        Builder: &DIBuilder<'ll>,
+        ParentScope: Option<&'ll Metadata>,
+        Name: *const c_uchar,
+        NameLen: size_t,
+        ExportSymbols: llvm::Bool,
+    ) -> &'ll Metadata;
+
+    pub(crate) fn LLVMDIBuilderCreateLexicalBlock<'ll>(
+        Builder: &DIBuilder<'ll>,
+        Scope: &'ll Metadata,
+        File: &'ll Metadata,
+        Line: c_uint,
+        Column: c_uint,
+    ) -> &'ll Metadata;
+
+    pub(crate) fn LLVMDIBuilderCreateLexicalBlockFile<'ll>(
+        Builder: &DIBuilder<'ll>,
+        Scope: &'ll Metadata,
+        File: &'ll Metadata,
+        Discriminator: c_uint, // (optional "DWARF path discriminator"; default is 0)
+    ) -> &'ll Metadata;
+
+    pub(crate) fn LLVMDIBuilderCreateDebugLocation<'ll>(
+        Ctx: &'ll Context,
+        Line: c_uint,
+        Column: c_uint,
+        Scope: &'ll Metadata,
+        InlinedAt: Option<&'ll Metadata>,
+    ) -> &'ll Metadata;
+}
+
 #[link(name = "llvm-wrapper", kind = "static")]
 unsafe extern "C" {
     pub fn LLVMRustInstallErrorHandlers();
@@ -1619,7 +1783,6 @@ unsafe extern "C" {
         Name: *const c_char,
         NameLen: size_t,
     ) -> Option<&Value>;
-    pub fn LLVMRustSetTailCallKind(CallInst: &Value, TKC: TailCallKind);
 
     // Operations on attributes
     pub fn LLVMRustCreateAttrNoValue(C: &Context, attr: AttributeKind) -> &Attribute;
@@ -1846,12 +2009,6 @@ unsafe extern "C" {
         ValueLen: size_t,
     );
 
-    pub fn LLVMRustDIBuilderCreate(M: &Module) -> &mut DIBuilder<'_>;
-
-    pub fn LLVMRustDIBuilderDispose<'a>(Builder: &'a mut DIBuilder<'a>);
-
-    pub fn LLVMRustDIBuilderFinalize(Builder: &DIBuilder<'_>);
-
     pub fn LLVMRustDIBuilderCreateCompileUnit<'a>(
         Builder: &DIBuilder<'a>,
         Lang: c_uint,
@@ -2008,19 +2165,11 @@ unsafe extern "C" {
         AlignInBits: u32,
     ) -> &'a DIDerivedType;
 
-    pub fn LLVMRustDIBuilderCreateLexicalBlock<'a>(
-        Builder: &DIBuilder<'a>,
-        Scope: &'a DIScope,
-        File: &'a DIFile,
-        Line: c_uint,
-        Col: c_uint,
-    ) -> &'a DILexicalBlock;
-
-    pub fn LLVMRustDIBuilderCreateLexicalBlockFile<'a>(
+    pub fn LLVMRustDIBuilderCreateQualifiedType<'a>(
         Builder: &DIBuilder<'a>,
-        Scope: &'a DIScope,
-        File: &'a DIFile,
-    ) -> &'a DILexicalBlock;
+        Tag: c_uint,
+        Type: &'a DIType,
+    ) -> &'a DIDerivedType;
 
     pub fn LLVMRustDIBuilderCreateStaticVariable<'a>(
         Builder: &DIBuilder<'a>,
@@ -2146,14 +2295,6 @@ unsafe extern "C" {
         Ty: &'a DIType,
     ) -> &'a DITemplateTypeParameter;
 
-    pub fn LLVMRustDIBuilderCreateNameSpace<'a>(
-        Builder: &DIBuilder<'a>,
-        Scope: Option<&'a DIScope>,
-        Name: *const c_char,
-        NameLen: size_t,
-        ExportSymbols: bool,
-    ) -> &'a DINameSpace;
-
     pub fn LLVMRustDICompositeTypeReplaceArrays<'a>(
         Builder: &DIBuilder<'a>,
         CompositeType: &'a DIType,
@@ -2161,19 +2302,10 @@ unsafe extern "C" {
         Params: Option<&'a DIArray>,
     );
 
-    pub fn LLVMRustDIBuilderCreateDebugLocation<'a>(
-        Line: c_uint,
-        Column: c_uint,
-        Scope: &'a DIScope,
-        InlinedAt: Option<&'a DILocation>,
-    ) -> &'a DILocation;
     pub fn LLVMRustDILocationCloneWithBaseDiscriminator<'a>(
         Location: &'a DILocation,
         BD: c_uint,
     ) -> Option<&'a DILocation>;
-    pub fn LLVMRustDIBuilderCreateOpDeref() -> u64;
-    pub fn LLVMRustDIBuilderCreateOpPlusUconst() -> u64;
-    pub fn LLVMRustDIBuilderCreateOpLLVMFragment() -> u64;
 
     pub fn LLVMRustWriteTypeToString(Type: &Type, s: &RustString);
     pub fn LLVMRustWriteValueToString(value_ref: &Value, s: &RustString);
@@ -2227,7 +2359,7 @@ unsafe extern "C" {
     );
     pub fn LLVMRustWriteOutputFile<'a>(
         T: &'a TargetMachine,
-        PM: &PassManager<'a>,
+        PM: *mut PassManager<'a>,
         M: &'a Module,
         Output: *const c_char,
         DwoOutput: *const c_char,
@@ -2250,6 +2382,7 @@ unsafe extern "C" {
         LoopVectorize: bool,
         DisableSimplifyLibCalls: bool,
         EmitLifetimeMarkers: bool,
+        RunEnzyme: bool,
         SanitizerOptions: Option<&SanitizerOptions>,
         PGOGenPath: *const c_char,
         PGOUsePath: *const c_char,
@@ -2371,7 +2504,7 @@ unsafe extern "C" {
         Data: &ThinLTOData,
         Module: &Module,
         Target: &TargetMachine,
-    ) -> bool;
+    );
     pub fn LLVMRustPrepareThinLTOResolveWeak(Data: &ThinLTOData, Module: &Module) -> bool;
     pub fn LLVMRustPrepareThinLTOInternalize(Data: &ThinLTOData, Module: &Module) -> bool;
     pub fn LLVMRustPrepareThinLTOImport(
@@ -2427,8 +2560,6 @@ unsafe extern "C" {
 
     pub fn LLVMRustGetElementTypeArgIndex(CallSite: &Value) -> i32;
 
-    pub fn LLVMRustIsBitcode(ptr: *const u8, len: usize) -> bool;
-
     pub fn LLVMRustLLVMHasZlibCompressionForDebugSymbols() -> bool;
 
     pub fn LLVMRustLLVMHasZstdCompressionForDebugSymbols() -> bool;
diff --git a/compiler/rustc_codegen_llvm/src/llvm/mod.rs b/compiler/rustc_codegen_llvm/src/llvm/mod.rs
index 2592a7df95c..7becba4ccd4 100644
--- a/compiler/rustc_codegen_llvm/src/llvm/mod.rs
+++ b/compiler/rustc_codegen_llvm/src/llvm/mod.rs
@@ -10,13 +10,9 @@ use libc::c_uint;
 use rustc_abi::{Align, Size, WrappingRange};
 use rustc_llvm::RustString;
 
-pub use self::AtomicRmwBinOp::*;
 pub use self::CallConv::*;
 pub use self::CodeGenOptSize::*;
-pub use self::IntPredicate::*;
-pub use self::Linkage::*;
 pub use self::MetadataType::*;
-pub use self::RealPredicate::*;
 pub use self::ffi::*;
 use crate::common::AsCCharPtr;
 
@@ -245,6 +241,10 @@ pub fn set_linkage(llglobal: &Value, linkage: Linkage) {
     }
 }
 
+pub fn is_declaration(llglobal: &Value) -> bool {
+    unsafe { LLVMIsDeclaration(llglobal) == ffi::True }
+}
+
 pub fn get_visibility(llglobal: &Value) -> Visibility {
     unsafe { LLVMGetVisibility(llglobal) }.to_rust()
 }
diff --git a/compiler/rustc_codegen_llvm/src/llvm_util.rs b/compiler/rustc_codegen_llvm/src/llvm_util.rs
index 628c0b1c29c..b4b5d6a5b19 100644
--- a/compiler/rustc_codegen_llvm/src/llvm_util.rs
+++ b/compiler/rustc_codegen_llvm/src/llvm_util.rs
@@ -21,8 +21,8 @@ use rustc_target::target_features::{RUSTC_SPECIAL_FEATURES, RUSTC_SPECIFIC_FEATU
 
 use crate::back::write::create_informational_target_machine;
 use crate::errors::{
-    FixedX18InvalidArch, ForbiddenCTargetFeature, InvalidTargetFeaturePrefix, PossibleFeature,
-    UnknownCTargetFeature, UnknownCTargetFeaturePrefix, UnstableCTargetFeature,
+    FixedX18InvalidArch, ForbiddenCTargetFeature, PossibleFeature, UnknownCTargetFeature,
+    UnknownCTargetFeaturePrefix, UnstableCTargetFeature,
 };
 use crate::llvm;
 
@@ -109,7 +109,10 @@ unsafe fn configure_llvm(sess: &Session) {
             add("-wasm-enable-eh", false);
         }
 
-        if sess.target.os == "emscripten" && sess.panic_strategy() == PanicStrategy::Unwind {
+        if sess.target.os == "emscripten"
+            && !sess.opts.unstable_opts.emscripten_wasm_eh
+            && sess.panic_strategy() == PanicStrategy::Unwind
+        {
             add("-enable-emscripten-cxx-exceptions", false);
         }
 
@@ -268,6 +271,7 @@ pub(crate) fn to_llvm_features<'a>(sess: &Session, s: &'a str) -> Option<LLVMFea
         ("aarch64", "fp16") => Some(LLVMFeature::new("fullfp16")),
         // Filter out features that are not supported by the current LLVM version
         ("aarch64", "fpmr") if get_version().0 != 18 => None,
+        ("arm", "fp16") => Some(LLVMFeature::new("fullfp16")),
         // In LLVM 18, `unaligned-scalar-mem` was merged with `unaligned-vector-mem` into a single
         // feature called `fast-unaligned-access`. In LLVM 19, it was split back out.
         ("riscv32" | "riscv64", "unaligned-scalar-mem") if get_version().0 == 18 => {
@@ -300,7 +304,7 @@ pub(crate) fn to_llvm_features<'a>(sess: &Session, s: &'a str) -> Option<LLVMFea
 /// Must express features in the way Rust understands them.
 ///
 /// We do not have to worry about RUSTC_SPECIFIC_FEATURES here, those are handled outside codegen.
-pub fn target_features_cfg(sess: &Session, allow_unstable: bool) -> Vec<Symbol> {
+pub(crate) fn target_features_cfg(sess: &Session, allow_unstable: bool) -> Vec<Symbol> {
     let mut features: FxHashSet<Symbol> = Default::default();
 
     // Add base features for the target.
@@ -316,7 +320,6 @@ pub fn target_features_cfg(sess: &Session, allow_unstable: bool) -> Vec<Symbol>
         sess.target
             .rust_target_features()
             .iter()
-            .filter(|(_, gate, _)| gate.in_cfg())
             .filter(|(feature, _, _)| {
                 // skip checking special features, as LLVM may not understand them
                 if RUSTC_SPECIAL_FEATURES.contains(feature) {
@@ -348,7 +351,16 @@ pub fn target_features_cfg(sess: &Session, allow_unstable: bool) -> Vec<Symbol>
     {
         if enabled {
             // Also add all transitively implied features.
-            features.extend(sess.target.implied_target_features(std::iter::once(feature)));
+
+            // We don't care about the order in `features` since the only thing we use it for is the
+            // `features.contains` below.
+            #[allow(rustc::potential_query_instability)]
+            features.extend(
+                sess.target
+                    .implied_target_features(std::iter::once(feature.as_str()))
+                    .iter()
+                    .map(|s| Symbol::intern(s)),
+            );
         } else {
             // Remove transitively reverse-implied features.
 
@@ -356,7 +368,11 @@ pub fn target_features_cfg(sess: &Session, allow_unstable: bool) -> Vec<Symbol>
             // `features.contains` below.
             #[allow(rustc::potential_query_instability)]
             features.retain(|f| {
-                if sess.target.implied_target_features(std::iter::once(*f)).contains(&feature) {
+                if sess
+                    .target
+                    .implied_target_features(std::iter::once(f.as_str()))
+                    .contains(&feature.as_str())
+                {
                     // If `f` if implies `feature`, then `!feature` implies `!f`, so we have to
                     // remove `f`. (This is the standard logical contraposition principle.)
                     false
@@ -372,9 +388,13 @@ pub fn target_features_cfg(sess: &Session, allow_unstable: bool) -> Vec<Symbol>
     sess.target
         .rust_target_features()
         .iter()
-        .filter(|(_, gate, _)| gate.in_cfg())
         .filter_map(|(feature, gate, _)| {
-            if sess.is_nightly_build() || allow_unstable || gate.requires_nightly().is_none() {
+            // The `allow_unstable` set is used by rustc internally to determined which target
+            // features are truly available, so we want to return even perma-unstable "forbidden"
+            // features.
+            if allow_unstable
+                || (gate.in_cfg() && (sess.is_nightly_build() || gate.requires_nightly().is_none()))
+            {
                 Some(*feature)
             } else {
                 None
@@ -638,7 +658,7 @@ pub(crate) fn global_llvm_features(
         sess.target
             .features
             .split(',')
-            .filter(|v| !v.is_empty() && backend_feature_name(sess, v).is_some())
+            .filter(|v| !v.is_empty())
             // Drop +v8plus feature introduced in LLVM 20.
             .filter(|v| *v != "+v8plus" || get_version() >= (20, 0, 0))
             .map(String::from),
@@ -651,89 +671,89 @@ pub(crate) fn global_llvm_features(
     // -Ctarget-features
     if !only_base_features {
         let known_features = sess.target.rust_target_features();
+        // Will only be filled when `diagnostics` is set!
         let mut featsmap = FxHashMap::default();
 
-        // insert implied features
+        // Compute implied features
         let mut all_rust_features = vec![];
         for feature in sess.opts.cg.target_feature.split(',') {
-            match feature.strip_prefix('+') {
-                Some(feature) => all_rust_features.extend(
-                    UnordSet::from(
-                        sess.target
-                            .implied_target_features(std::iter::once(Symbol::intern(feature))),
-                    )
-                    .to_sorted_stable_ord()
-                    .iter()
-                    .map(|s| format!("+{}", s.as_str())),
-                ),
-                _ => all_rust_features.push(feature.to_string()),
+            if let Some(feature) = feature.strip_prefix('+') {
+                all_rust_features.extend(
+                    UnordSet::from(sess.target.implied_target_features(std::iter::once(feature)))
+                        .to_sorted_stable_ord()
+                        .iter()
+                        .map(|&&s| (true, s)),
+                )
+            } else if let Some(feature) = feature.strip_prefix('-') {
+                // FIXME: Why do we not remove implied features on "-" here?
+                // We do the equivalent above in `target_features_cfg`.
+                // See <https://github.com/rust-lang/rust/issues/134792>.
+                all_rust_features.push((false, feature));
+            } else if !feature.is_empty() {
+                if diagnostics {
+                    sess.dcx().emit_warn(UnknownCTargetFeaturePrefix { feature });
+                }
             }
         }
+        // Remove features that are meant for rustc, not LLVM.
+        all_rust_features.retain(|(_, feature)| {
+            // Retain if it is not a rustc feature
+            !RUSTC_SPECIFIC_FEATURES.contains(feature)
+        });
 
-        let feats = all_rust_features
-            .iter()
-            .filter_map(|s| {
-                let enable_disable = match s.chars().next() {
-                    None => return None,
-                    Some(c @ ('+' | '-')) => c,
-                    Some(_) => {
-                        if diagnostics {
-                            sess.dcx().emit_warn(UnknownCTargetFeaturePrefix { feature: s });
-                        }
-                        return None;
-                    }
-                };
-
-                // Get the backend feature name, if any.
-                // This excludes rustc-specific features, which do not get passed to LLVM.
-                let feature = backend_feature_name(sess, s)?;
-                // Warn against use of LLVM specific feature names and unstable features on the CLI.
-                if diagnostics {
-                    let feature_state = known_features.iter().find(|&&(v, _, _)| v == feature);
-                    match feature_state {
-                        None => {
-                            let rust_feature =
-                                known_features.iter().find_map(|&(rust_feature, _, _)| {
-                                    let llvm_features = to_llvm_features(sess, rust_feature)?;
-                                    if llvm_features.contains(feature)
-                                        && !llvm_features.contains(rust_feature)
-                                    {
-                                        Some(rust_feature)
-                                    } else {
-                                        None
-                                    }
-                                });
-                            let unknown_feature = if let Some(rust_feature) = rust_feature {
-                                UnknownCTargetFeature {
-                                    feature,
-                                    rust_feature: PossibleFeature::Some { rust_feature },
+        // Check feature validity.
+        if diagnostics {
+            for &(enable, feature) in &all_rust_features {
+                let feature_state = known_features.iter().find(|&&(v, _, _)| v == feature);
+                match feature_state {
+                    None => {
+                        let rust_feature =
+                            known_features.iter().find_map(|&(rust_feature, _, _)| {
+                                let llvm_features = to_llvm_features(sess, rust_feature)?;
+                                if llvm_features.contains(feature)
+                                    && !llvm_features.contains(rust_feature)
+                                {
+                                    Some(rust_feature)
+                                } else {
+                                    None
                                 }
-                            } else {
-                                UnknownCTargetFeature {
-                                    feature,
-                                    rust_feature: PossibleFeature::None,
-                                }
-                            };
-                            sess.dcx().emit_warn(unknown_feature);
-                        }
-                        Some((_, stability, _)) => {
-                            if let Err(reason) =
-                                stability.toggle_allowed(&sess.target, enable_disable == '+')
-                            {
-                                sess.dcx().emit_warn(ForbiddenCTargetFeature { feature, reason });
-                            } else if stability.requires_nightly().is_some() {
-                                // An unstable feature. Warn about using it. It makes little sense
-                                // to hard-error here since we just warn about fully unknown
-                                // features above.
-                                sess.dcx().emit_warn(UnstableCTargetFeature { feature });
+                            });
+                        let unknown_feature = if let Some(rust_feature) = rust_feature {
+                            UnknownCTargetFeature {
+                                feature,
+                                rust_feature: PossibleFeature::Some { rust_feature },
                             }
+                        } else {
+                            UnknownCTargetFeature { feature, rust_feature: PossibleFeature::None }
+                        };
+                        sess.dcx().emit_warn(unknown_feature);
+                    }
+                    Some((_, stability, _)) => {
+                        if let Err(reason) = stability.toggle_allowed() {
+                            sess.dcx().emit_warn(ForbiddenCTargetFeature {
+                                feature,
+                                enabled: if enable { "enabled" } else { "disabled" },
+                                reason,
+                            });
+                        } else if stability.requires_nightly().is_some() {
+                            // An unstable feature. Warn about using it. It makes little sense
+                            // to hard-error here since we just warn about fully unknown
+                            // features above.
+                            sess.dcx().emit_warn(UnstableCTargetFeature { feature });
                         }
                     }
-
-                    // FIXME(nagisa): figure out how to not allocate a full hashset here.
-                    featsmap.insert(feature, enable_disable == '+');
                 }
 
+                // FIXME(nagisa): figure out how to not allocate a full hashset here.
+                featsmap.insert(feature, enable);
+            }
+        }
+
+        // Translate this into LLVM features.
+        let feats = all_rust_features
+            .iter()
+            .filter_map(|&(enable, feature)| {
+                let enable_disable = if enable { '+' } else { '-' };
                 // We run through `to_llvm_features` when
                 // passing requests down to LLVM. This means that all in-language
                 // features also work on the command line instead of having two
@@ -746,9 +766,9 @@ pub(crate) fn global_llvm_features(
                         enable_disable, llvm_feature.llvm_feature_name
                     ))
                     .chain(llvm_feature.dependency.into_iter().filter_map(
-                        move |feat| match (enable_disable, feat) {
-                            ('-' | '+', TargetFeatureFoldStrength::Both(f))
-                            | ('+', TargetFeatureFoldStrength::EnableOnly(f)) => {
+                        move |feat| match (enable, feat) {
+                            (_, TargetFeatureFoldStrength::Both(f))
+                            | (true, TargetFeatureFoldStrength::EnableOnly(f)) => {
                                 Some(format!("{enable_disable}{f}"))
                             }
                             _ => None,
@@ -780,22 +800,6 @@ pub(crate) fn global_llvm_features(
     features
 }
 
-/// Returns a feature name for the given `+feature` or `-feature` string.
-///
-/// Only allows features that are backend specific (i.e. not [`RUSTC_SPECIFIC_FEATURES`].)
-fn backend_feature_name<'a>(sess: &Session, s: &'a str) -> Option<&'a str> {
-    // features must start with a `+` or `-`.
-    let feature = s
-        .strip_prefix(&['+', '-'][..])
-        .unwrap_or_else(|| sess.dcx().emit_fatal(InvalidTargetFeaturePrefix { feature: s }));
-    // Rustc-specific feature requests like `+crt-static` or `-crt-static`
-    // are not passed down to LLVM.
-    if s.is_empty() || RUSTC_SPECIFIC_FEATURES.contains(&feature) {
-        return None;
-    }
-    Some(feature)
-}
-
 pub(crate) fn tune_cpu(sess: &Session) -> Option<&str> {
     let name = sess.opts.unstable_opts.tune_cpu.as_ref()?;
     Some(handle_native(name))
diff --git a/compiler/rustc_codegen_llvm/src/mono_item.rs b/compiler/rustc_codegen_llvm/src/mono_item.rs
index 33789c6261f..70edee21bd6 100644
--- a/compiler/rustc_codegen_llvm/src/mono_item.rs
+++ b/compiler/rustc_codegen_llvm/src/mono_item.rs
@@ -71,10 +71,7 @@ impl<'tcx> PreDefineCodegenMethods<'tcx> for CodegenCx<'_, 'tcx> {
         // compiler-rt, then we want to implicitly compile everything with hidden
         // visibility as we're going to link this object all over the place but
         // don't want the symbols to get exported.
-        if linkage != Linkage::Internal
-            && linkage != Linkage::Private
-            && self.tcx.is_compiler_builtins(LOCAL_CRATE)
-        {
+        if linkage != Linkage::Internal && self.tcx.is_compiler_builtins(LOCAL_CRATE) {
             llvm::set_visibility(lldecl, llvm::Visibility::Hidden);
         } else {
             llvm::set_visibility(lldecl, base::visibility_to_llvm(visibility));
diff --git a/compiler/rustc_codegen_llvm/src/type_.rs b/compiler/rustc_codegen_llvm/src/type_.rs
index 6aec078e0de..d61ce417562 100644
--- a/compiler/rustc_codegen_llvm/src/type_.rs
+++ b/compiler/rustc_codegen_llvm/src/type_.rs
@@ -1,17 +1,17 @@
 use std::{fmt, ptr};
 
 use libc::{c_char, c_uint};
-use rustc_abi::{AddressSpace, Align, Integer, Size};
+use rustc_abi::{AddressSpace, Align, Integer, Reg, Size};
 use rustc_codegen_ssa::common::TypeKind;
 use rustc_codegen_ssa::traits::*;
 use rustc_data_structures::small_c_str::SmallCStr;
 use rustc_middle::bug;
 use rustc_middle::ty::layout::TyAndLayout;
 use rustc_middle::ty::{self, Ty};
-use rustc_target::callconv::{CastTarget, FnAbi, Reg};
+use rustc_target::callconv::{CastTarget, FnAbi};
 
 use crate::abi::{FnAbiLlvmExt, LlvmType};
-use crate::context::CodegenCx;
+use crate::context::{CodegenCx, SimpleCx};
 pub(crate) use crate::llvm::Type;
 use crate::llvm::{Bool, False, Metadata, True};
 use crate::type_of::LayoutLlvmExt;
@@ -35,7 +35,8 @@ impl fmt::Debug for Type {
     }
 }
 
-impl<'ll> CodegenCx<'ll, '_> {
+impl<'ll> CodegenCx<'ll, '_> {}
+impl<'ll> SimpleCx<'ll> {
     pub(crate) fn type_named_struct(&self, name: &str) -> &'ll Type {
         let name = SmallCStr::new(name);
         unsafe { llvm::LLVMStructCreateNamed(self.llcx, name.as_ptr()) }
@@ -44,11 +45,9 @@ impl<'ll> CodegenCx<'ll, '_> {
     pub(crate) fn set_struct_body(&self, ty: &'ll Type, els: &[&'ll Type], packed: bool) {
         unsafe { llvm::LLVMStructSetBody(ty, els.as_ptr(), els.len() as c_uint, packed as Bool) }
     }
-
     pub(crate) fn type_void(&self) -> &'ll Type {
         unsafe { llvm::LLVMVoidTypeInContext(self.llcx) }
     }
-
     pub(crate) fn type_token(&self) -> &'ll Type {
         unsafe { llvm::LLVMTokenTypeInContext(self.llcx) }
     }
@@ -75,7 +74,8 @@ impl<'ll> CodegenCx<'ll, '_> {
             args
         }
     }
-
+}
+impl<'ll, 'tcx> CodegenCx<'ll, 'tcx> {
     pub(crate) fn type_bool(&self) -> &'ll Type {
         self.type_i8()
     }
@@ -120,7 +120,8 @@ impl<'ll> CodegenCx<'ll, '_> {
         assert_eq!(size % unit_size, 0);
         self.type_array(self.type_from_integer(unit), size / unit_size)
     }
-
+}
+impl<'ll> SimpleCx<'ll> {
     pub(crate) fn type_variadic_func(&self, args: &[&'ll Type], ret: &'ll Type) -> &'ll Type {
         unsafe { llvm::LLVMFunctionType(ret, args.as_ptr(), args.len() as c_uint, True) }
     }
@@ -236,11 +237,11 @@ impl<'ll, 'tcx> BaseTypeCodegenMethods<'tcx> for CodegenCx<'ll, 'tcx> {
 
 impl Type {
     /// Creates an integer type with the given number of bits, e.g., i24
-    pub fn ix_llcx(llcx: &llvm::Context, num_bits: u64) -> &Type {
+    pub(crate) fn ix_llcx(llcx: &llvm::Context, num_bits: u64) -> &Type {
         unsafe { llvm::LLVMIntTypeInContext(llcx, num_bits as c_uint) }
     }
 
-    pub fn ptr_llcx(llcx: &llvm::Context) -> &Type {
+    pub(crate) fn ptr_llcx(llcx: &llvm::Context) -> &Type {
         unsafe { llvm::LLVMPointerTypeInContext(llcx, AddressSpace::DATA.0) }
     }
 }