about summary refs log tree commit diff
path: root/compiler/rustc_codegen_llvm/src
diff options
context:
space:
mode:
Diffstat (limited to 'compiler/rustc_codegen_llvm/src')
-rw-r--r--compiler/rustc_codegen_llvm/src/abi.rs67
-rw-r--r--compiler/rustc_codegen_llvm/src/allocator.rs173
-rw-r--r--compiler/rustc_codegen_llvm/src/asm.rs141
-rw-r--r--compiler/rustc_codegen_llvm/src/back/archive.rs36
-rw-r--r--compiler/rustc_codegen_llvm/src/back/lto.rs113
-rw-r--r--compiler/rustc_codegen_llvm/src/back/owned_target_machine.rs13
-rw-r--r--compiler/rustc_codegen_llvm/src/back/write.rs374
-rw-r--r--compiler/rustc_codegen_llvm/src/base.rs10
-rw-r--r--compiler/rustc_codegen_llvm/src/builder.rs323
-rw-r--r--compiler/rustc_codegen_llvm/src/builder/autodiff.rs432
-rw-r--r--compiler/rustc_codegen_llvm/src/callee.rs106
-rw-r--r--compiler/rustc_codegen_llvm/src/common.rs58
-rw-r--r--compiler/rustc_codegen_llvm/src/consts.rs75
-rw-r--r--compiler/rustc_codegen_llvm/src/context.rs191
-rw-r--r--compiler/rustc_codegen_llvm/src/coverageinfo/ffi.rs19
-rw-r--r--compiler/rustc_codegen_llvm/src/coverageinfo/llvm_cov.rs16
-rw-r--r--compiler/rustc_codegen_llvm/src/coverageinfo/mapgen/covfun.rs102
-rw-r--r--compiler/rustc_codegen_llvm/src/coverageinfo/mapgen/spans.rs75
-rw-r--r--compiler/rustc_codegen_llvm/src/coverageinfo/mod.rs23
-rw-r--r--compiler/rustc_codegen_llvm/src/debuginfo/gdb.rs4
-rw-r--r--compiler/rustc_codegen_llvm/src/debuginfo/metadata.rs267
-rw-r--r--compiler/rustc_codegen_llvm/src/debuginfo/metadata/enums/cpp_like.rs53
-rw-r--r--compiler/rustc_codegen_llvm/src/debuginfo/metadata/enums/mod.rs7
-rw-r--r--compiler/rustc_codegen_llvm/src/debuginfo/metadata/enums/native.rs47
-rw-r--r--compiler/rustc_codegen_llvm/src/debuginfo/metadata/type_map.rs5
-rw-r--r--compiler/rustc_codegen_llvm/src/debuginfo/mod.rs102
-rw-r--r--compiler/rustc_codegen_llvm/src/declare.rs22
-rw-r--r--compiler/rustc_codegen_llvm/src/errors.rs10
-rw-r--r--compiler/rustc_codegen_llvm/src/intrinsic.rs525
-rw-r--r--compiler/rustc_codegen_llvm/src/lib.rs30
-rw-r--r--compiler/rustc_codegen_llvm/src/llvm/archive_ro.rs12
-rw-r--r--compiler/rustc_codegen_llvm/src/llvm/diagnostic.rs20
-rw-r--r--compiler/rustc_codegen_llvm/src/llvm/enzyme_ffi.rs132
-rw-r--r--compiler/rustc_codegen_llvm/src/llvm/ffi.rs1026
-rw-r--r--compiler/rustc_codegen_llvm/src/llvm/mod.rs147
-rw-r--r--compiler/rustc_codegen_llvm/src/llvm_util.rs132
-rw-r--r--compiler/rustc_codegen_llvm/src/mono_item.rs32
-rw-r--r--compiler/rustc_codegen_llvm/src/type_.rs62
-rw-r--r--compiler/rustc_codegen_llvm/src/type_of.rs15
-rw-r--r--compiler/rustc_codegen_llvm/src/va_arg.rs2
40 files changed, 2783 insertions, 2216 deletions
diff --git a/compiler/rustc_codegen_llvm/src/abi.rs b/compiler/rustc_codegen_llvm/src/abi.rs
index 31ee0eeca11..71059338151 100644
--- a/compiler/rustc_codegen_llvm/src/abi.rs
+++ b/compiler/rustc_codegen_llvm/src/abi.rs
@@ -2,20 +2,18 @@ use std::borrow::Borrow;
 use std::cmp;
 
 use libc::c_uint;
-use rustc_abi as abi;
-pub(crate) use rustc_abi::ExternAbi;
-use rustc_abi::Primitive::Int;
-use rustc_abi::{HasDataLayout, Size};
+use rustc_abi::{BackendRepr, HasDataLayout, Primitive, Reg, RegKind, Size};
 use rustc_codegen_ssa::MemFlags;
 use rustc_codegen_ssa::mir::operand::{OperandRef, OperandValue};
 use rustc_codegen_ssa::mir::place::{PlaceRef, PlaceValue};
 use rustc_codegen_ssa::traits::*;
 use rustc_middle::ty::Ty;
 use rustc_middle::ty::layout::LayoutOf;
-pub(crate) use rustc_middle::ty::layout::{WIDE_PTR_ADDR, WIDE_PTR_EXTRA};
 use rustc_middle::{bug, ty};
 use rustc_session::config;
-pub(crate) use rustc_target::callconv::*;
+use rustc_target::callconv::{
+    ArgAbi, ArgAttribute, ArgAttributes, ArgExtension, CastTarget, Conv, FnAbi, PassMode,
+};
 use rustc_target::spec::SanitizerSet;
 use smallvec::SmallVec;
 
@@ -440,7 +438,7 @@ impl<'ll, 'tcx> FnAbiLlvmExt<'ll, 'tcx> for FnAbi<'tcx, Ty<'tcx>> {
         let apply_range_attr = |idx: AttributePlace, scalar: rustc_abi::Scalar| {
             if cx.sess().opts.optimize != config::OptLevel::No
                 && llvm_util::get_version() >= (19, 0, 0)
-                && matches!(scalar.primitive(), Int(..))
+                && matches!(scalar.primitive(), Primitive::Int(..))
                 // If the value is a boolean, the range is 0..2 and that ultimately
                 // become 0..0 when the type becomes i1, which would be rejected
                 // by the LLVM verifier.
@@ -448,18 +446,18 @@ impl<'ll, 'tcx> FnAbiLlvmExt<'ll, 'tcx> for FnAbi<'tcx, Ty<'tcx>> {
                 // LLVM also rejects full range.
                 && !scalar.is_always_valid(cx)
             {
-                attributes::apply_to_llfn(llfn, idx, &[llvm::CreateRangeAttr(
-                    cx.llcx,
-                    scalar.size(cx),
-                    scalar.valid_range(cx),
-                )]);
+                attributes::apply_to_llfn(
+                    llfn,
+                    idx,
+                    &[llvm::CreateRangeAttr(cx.llcx, scalar.size(cx), scalar.valid_range(cx))],
+                );
             }
         };
 
         match &self.ret.mode {
             PassMode::Direct(attrs) => {
                 attrs.apply_attrs_to_llfn(llvm::AttributePlace::ReturnValue, cx, llfn);
-                if let abi::BackendRepr::Scalar(scalar) = self.ret.layout.backend_repr {
+                if let BackendRepr::Scalar(scalar) = self.ret.layout.backend_repr {
                     apply_range_attr(llvm::AttributePlace::ReturnValue, scalar);
                 }
             }
@@ -472,10 +470,14 @@ impl<'ll, 'tcx> FnAbiLlvmExt<'ll, 'tcx> for FnAbi<'tcx, Ty<'tcx>> {
                 );
                 attributes::apply_to_llfn(llfn, llvm::AttributePlace::Argument(i), &[sret]);
                 if cx.sess().opts.optimize != config::OptLevel::No {
-                    attributes::apply_to_llfn(llfn, llvm::AttributePlace::Argument(i), &[
-                        llvm::AttributeKind::Writable.create_attr(cx.llcx),
-                        llvm::AttributeKind::DeadOnUnwind.create_attr(cx.llcx),
-                    ]);
+                    attributes::apply_to_llfn(
+                        llfn,
+                        llvm::AttributePlace::Argument(i),
+                        &[
+                            llvm::AttributeKind::Writable.create_attr(cx.llcx),
+                            llvm::AttributeKind::DeadOnUnwind.create_attr(cx.llcx),
+                        ],
+                    );
                 }
             }
             PassMode::Cast { cast, pad_i32: _ } => {
@@ -496,7 +498,7 @@ impl<'ll, 'tcx> FnAbiLlvmExt<'ll, 'tcx> for FnAbi<'tcx, Ty<'tcx>> {
                 }
                 PassMode::Direct(attrs) => {
                     let i = apply(attrs);
-                    if let abi::BackendRepr::Scalar(scalar) = arg.layout.backend_repr {
+                    if let BackendRepr::Scalar(scalar) = arg.layout.backend_repr {
                         apply_range_attr(llvm::AttributePlace::Argument(i), scalar);
                     }
                 }
@@ -511,9 +513,7 @@ impl<'ll, 'tcx> FnAbiLlvmExt<'ll, 'tcx> for FnAbi<'tcx, Ty<'tcx>> {
                 PassMode::Pair(a, b) => {
                     let i = apply(a);
                     let ii = apply(b);
-                    if let abi::BackendRepr::ScalarPair(scalar_a, scalar_b) =
-                        arg.layout.backend_repr
-                    {
+                    if let BackendRepr::ScalarPair(scalar_a, scalar_b) = arg.layout.backend_repr {
                         apply_range_attr(llvm::AttributePlace::Argument(i), scalar_a);
                         apply_range_attr(llvm::AttributePlace::Argument(ii), scalar_b);
                     }
@@ -573,8 +573,8 @@ impl<'ll, 'tcx> FnAbiLlvmExt<'ll, 'tcx> for FnAbi<'tcx, Ty<'tcx>> {
         }
         if bx.cx.sess().opts.optimize != config::OptLevel::No
                 && llvm_util::get_version() < (19, 0, 0)
-                && let abi::BackendRepr::Scalar(scalar) = self.ret.layout.backend_repr
-                && matches!(scalar.primitive(), Int(..))
+                && let BackendRepr::Scalar(scalar) = self.ret.layout.backend_repr
+                && matches!(scalar.primitive(), Primitive::Int(..))
                 // If the value is a boolean, the range is 0..2 and that ultimately
                 // become 0..0 when the type becomes i1, which would be rejected
                 // by the LLVM verifier.
@@ -593,9 +593,11 @@ impl<'ll, 'tcx> FnAbiLlvmExt<'ll, 'tcx> for FnAbi<'tcx, Ty<'tcx>> {
                         bx.cx.llcx,
                         bx.cx.type_array(bx.cx.type_i8(), arg.layout.size.bytes()),
                     );
-                    attributes::apply_to_callsite(callsite, llvm::AttributePlace::Argument(i), &[
-                        byval,
-                    ]);
+                    attributes::apply_to_callsite(
+                        callsite,
+                        llvm::AttributePlace::Argument(i),
+                        &[byval],
+                    );
                 }
                 PassMode::Direct(attrs)
                 | PassMode::Indirect { attrs, meta_attrs: None, on_stack: false } => {
@@ -627,9 +629,11 @@ impl<'ll, 'tcx> FnAbiLlvmExt<'ll, 'tcx> for FnAbi<'tcx, Ty<'tcx>> {
             // This will probably get ignored on all targets but those supporting the TrustZone-M
             // extension (thumbv8m targets).
             let cmse_nonsecure_call = llvm::CreateAttrString(bx.cx.llcx, "cmse_nonsecure_call");
-            attributes::apply_to_callsite(callsite, llvm::AttributePlace::Function, &[
-                cmse_nonsecure_call,
-            ]);
+            attributes::apply_to_callsite(
+                callsite,
+                llvm::AttributePlace::Function,
+                &[cmse_nonsecure_call],
+            );
         }
 
         // Some intrinsics require that an elementtype attribute (with the pointee type of a
@@ -650,14 +654,14 @@ impl<'ll, 'tcx> FnAbiLlvmExt<'ll, 'tcx> for FnAbi<'tcx, Ty<'tcx>> {
     }
 }
 
-impl<'tcx> AbiBuilderMethods<'tcx> for Builder<'_, '_, 'tcx> {
+impl AbiBuilderMethods for Builder<'_, '_, '_> {
     fn get_param(&mut self, index: usize) -> Self::Value {
         llvm::get_param(self.llfn(), index as c_uint)
     }
 }
 
 impl llvm::CallConv {
-    pub fn from_conv(conv: Conv, arch: &str) -> Self {
+    pub(crate) fn from_conv(conv: Conv, arch: &str) -> Self {
         match conv {
             Conv::C
             | Conv::Rust
@@ -680,7 +684,6 @@ impl llvm::CallConv {
             Conv::AvrNonBlockingInterrupt => llvm::AvrNonBlockingInterrupt,
             Conv::ArmAapcs => llvm::ArmAapcsCallConv,
             Conv::Msp430Intr => llvm::Msp430Intr,
-            Conv::PtxKernel => llvm::PtxKernel,
             Conv::X86Fastcall => llvm::X86FastcallCallConv,
             Conv::X86Intr => llvm::X86_Intr,
             Conv::X86Stdcall => llvm::X86StdcallCallConv,
diff --git a/compiler/rustc_codegen_llvm/src/allocator.rs b/compiler/rustc_codegen_llvm/src/allocator.rs
index 149ded28356..4a78e694979 100644
--- a/compiler/rustc_codegen_llvm/src/allocator.rs
+++ b/compiler/rustc_codegen_llvm/src/allocator.rs
@@ -3,33 +3,32 @@ use rustc_ast::expand::allocator::{
     ALLOCATOR_METHODS, AllocatorKind, AllocatorTy, NO_ALLOC_SHIM_IS_UNSTABLE,
     alloc_error_handler_name, default_fn_name, global_fn_name,
 };
+use rustc_codegen_ssa::traits::BaseTypeCodegenMethods as _;
 use rustc_middle::bug;
 use rustc_middle::ty::TyCtxt;
 use rustc_session::config::{DebugInfo, OomStrategy};
+use rustc_symbol_mangling::mangle_internal_symbol;
 
-use crate::common::AsCCharPtr;
-use crate::llvm::{self, Context, False, Module, True, Type};
-use crate::{ModuleLlvm, attributes, debuginfo};
+use crate::builder::SBuilder;
+use crate::declare::declare_simple_fn;
+use crate::llvm::{self, False, True, Type};
+use crate::{SimpleCx, attributes, debuginfo};
 
 pub(crate) unsafe fn codegen(
     tcx: TyCtxt<'_>,
-    module_llvm: &mut ModuleLlvm,
+    cx: SimpleCx<'_>,
     module_name: &str,
     kind: AllocatorKind,
     alloc_error_handler_kind: AllocatorKind,
 ) {
-    let llcx = &*module_llvm.llcx;
-    let llmod = module_llvm.llmod();
-    let usize = unsafe {
-        match tcx.sess.target.pointer_width {
-            16 => llvm::LLVMInt16TypeInContext(llcx),
-            32 => llvm::LLVMInt32TypeInContext(llcx),
-            64 => llvm::LLVMInt64TypeInContext(llcx),
-            tws => bug!("Unsupported target word size for int: {}", tws),
-        }
+    let usize = match tcx.sess.target.pointer_width {
+        16 => cx.type_i16(),
+        32 => cx.type_i32(),
+        64 => cx.type_i64(),
+        tws => bug!("Unsupported target word size for int: {}", tws),
     };
-    let i8 = unsafe { llvm::LLVMInt8TypeInContext(llcx) };
-    let i8p = unsafe { llvm::LLVMPointerTypeInContext(llcx, 0) };
+    let i8 = cx.type_i8();
+    let i8p = cx.type_ptr();
 
     if kind == AllocatorKind::Default {
         for method in ALLOCATOR_METHODS {
@@ -55,20 +54,19 @@ pub(crate) unsafe fn codegen(
                 }
             };
 
-            let from_name = global_fn_name(method.name);
-            let to_name = default_fn_name(method.name);
+            let from_name = mangle_internal_symbol(tcx, &global_fn_name(method.name));
+            let to_name = mangle_internal_symbol(tcx, &default_fn_name(method.name));
 
-            create_wrapper_function(tcx, llcx, llmod, &from_name, &to_name, &args, output, false);
+            create_wrapper_function(tcx, &cx, &from_name, &to_name, &args, output, false);
         }
     }
 
     // rust alloc error handler
     create_wrapper_function(
         tcx,
-        llcx,
-        llmod,
-        "__rust_alloc_error_handler",
-        alloc_error_handler_name(alloc_error_handler_kind),
+        &cx,
+        &mangle_internal_symbol(tcx, "__rust_alloc_error_handler"),
+        &mangle_internal_symbol(tcx, alloc_error_handler_name(alloc_error_handler_kind)),
         &[usize, usize], // size, align
         None,
         true,
@@ -76,22 +74,22 @@ pub(crate) unsafe fn codegen(
 
     unsafe {
         // __rust_alloc_error_handler_should_panic
-        let name = OomStrategy::SYMBOL;
-        let ll_g = llvm::LLVMRustGetOrInsertGlobal(llmod, name.as_c_char_ptr(), name.len(), i8);
+        let name = mangle_internal_symbol(tcx, OomStrategy::SYMBOL);
+        let ll_g = cx.declare_global(&name, i8);
         llvm::set_visibility(ll_g, llvm::Visibility::from_generic(tcx.sess.default_visibility()));
         let val = tcx.sess.opts.unstable_opts.oom.should_panic();
         let llval = llvm::LLVMConstInt(i8, val as u64, False);
-        llvm::LLVMSetInitializer(ll_g, llval);
+        llvm::set_initializer(ll_g, llval);
 
-        let name = NO_ALLOC_SHIM_IS_UNSTABLE;
-        let ll_g = llvm::LLVMRustGetOrInsertGlobal(llmod, name.as_c_char_ptr(), name.len(), i8);
+        let name = mangle_internal_symbol(tcx, NO_ALLOC_SHIM_IS_UNSTABLE);
+        let ll_g = cx.declare_global(&name, i8);
         llvm::set_visibility(ll_g, llvm::Visibility::from_generic(tcx.sess.default_visibility()));
         let llval = llvm::LLVMConstInt(i8, 0, False);
-        llvm::LLVMSetInitializer(ll_g, llval);
+        llvm::set_initializer(ll_g, llval);
     }
 
     if tcx.sess.opts.debuginfo != DebugInfo::None {
-        let dbg_cx = debuginfo::CodegenUnitDebugContext::new(llmod);
+        let dbg_cx = debuginfo::CodegenUnitDebugContext::new(cx.llmod);
         debuginfo::metadata::build_compile_unit_di_node(tcx, module_name, &dbg_cx);
         dbg_cx.finalize(tcx.sess);
     }
@@ -99,77 +97,64 @@ pub(crate) unsafe fn codegen(
 
 fn create_wrapper_function(
     tcx: TyCtxt<'_>,
-    llcx: &Context,
-    llmod: &Module,
+    cx: &SimpleCx<'_>,
     from_name: &str,
     to_name: &str,
     args: &[&Type],
     output: Option<&Type>,
     no_return: bool,
 ) {
-    unsafe {
-        let ty = llvm::LLVMFunctionType(
-            output.unwrap_or_else(|| llvm::LLVMVoidTypeInContext(llcx)),
-            args.as_ptr(),
-            args.len() as c_uint,
-            False,
-        );
-        let llfn = llvm::LLVMRustGetOrInsertFunction(
-            llmod,
-            from_name.as_c_char_ptr(),
-            from_name.len(),
-            ty,
-        );
-        let no_return = if no_return {
-            // -> ! DIFlagNoReturn
-            let no_return = llvm::AttributeKind::NoReturn.create_attr(llcx);
-            attributes::apply_to_llfn(llfn, llvm::AttributePlace::Function, &[no_return]);
-            Some(no_return)
-        } else {
-            None
-        };
-
-        llvm::set_visibility(llfn, llvm::Visibility::from_generic(tcx.sess.default_visibility()));
-
-        if tcx.sess.must_emit_unwind_tables() {
-            let uwtable =
-                attributes::uwtable_attr(llcx, tcx.sess.opts.unstable_opts.use_sync_unwind);
-            attributes::apply_to_llfn(llfn, llvm::AttributePlace::Function, &[uwtable]);
-        }
+    let ty = cx.type_func(args, output.unwrap_or_else(|| cx.type_void()));
+    let llfn = declare_simple_fn(
+        &cx,
+        from_name,
+        llvm::CallConv::CCallConv,
+        llvm::UnnamedAddr::Global,
+        llvm::Visibility::from_generic(tcx.sess.default_visibility()),
+        ty,
+    );
+    let no_return = if no_return {
+        // -> ! DIFlagNoReturn
+        let no_return = llvm::AttributeKind::NoReturn.create_attr(cx.llcx);
+        attributes::apply_to_llfn(llfn, llvm::AttributePlace::Function, &[no_return]);
+        Some(no_return)
+    } else {
+        None
+    };
 
-        let callee =
-            llvm::LLVMRustGetOrInsertFunction(llmod, to_name.as_c_char_ptr(), to_name.len(), ty);
-        if let Some(no_return) = no_return {
-            // -> ! DIFlagNoReturn
-            attributes::apply_to_llfn(callee, llvm::AttributePlace::Function, &[no_return]);
-        }
-        llvm::set_visibility(callee, llvm::Visibility::Hidden);
-
-        let llbb = llvm::LLVMAppendBasicBlockInContext(llcx, llfn, c"entry".as_ptr());
-
-        let llbuilder = llvm::LLVMCreateBuilderInContext(llcx);
-        llvm::LLVMPositionBuilderAtEnd(llbuilder, llbb);
-        let args = args
-            .iter()
-            .enumerate()
-            .map(|(i, _)| llvm::LLVMGetParam(llfn, i as c_uint))
-            .collect::<Vec<_>>();
-        let ret = llvm::LLVMBuildCallWithOperandBundles(
-            llbuilder,
-            ty,
-            callee,
-            args.as_ptr(),
-            args.len() as c_uint,
-            [].as_ptr(),
-            0 as c_uint,
-            c"".as_ptr(),
-        );
-        llvm::LLVMSetTailCall(ret, True);
-        if output.is_some() {
-            llvm::LLVMBuildRet(llbuilder, ret);
-        } else {
-            llvm::LLVMBuildRetVoid(llbuilder);
-        }
-        llvm::LLVMDisposeBuilder(llbuilder);
+    if tcx.sess.must_emit_unwind_tables() {
+        let uwtable =
+            attributes::uwtable_attr(cx.llcx, tcx.sess.opts.unstable_opts.use_sync_unwind);
+        attributes::apply_to_llfn(llfn, llvm::AttributePlace::Function, &[uwtable]);
+    }
+
+    let callee = declare_simple_fn(
+        &cx,
+        to_name,
+        llvm::CallConv::CCallConv,
+        llvm::UnnamedAddr::Global,
+        llvm::Visibility::Hidden,
+        ty,
+    );
+    if let Some(no_return) = no_return {
+        // -> ! DIFlagNoReturn
+        attributes::apply_to_llfn(callee, llvm::AttributePlace::Function, &[no_return]);
+    }
+    llvm::set_visibility(callee, llvm::Visibility::Hidden);
+
+    let llbb = unsafe { llvm::LLVMAppendBasicBlockInContext(cx.llcx, llfn, c"entry".as_ptr()) };
+
+    let mut bx = SBuilder::build(&cx, llbb);
+    let args = args
+        .iter()
+        .enumerate()
+        .map(|(i, _)| llvm::get_param(llfn, i as c_uint))
+        .collect::<Vec<_>>();
+    let ret = bx.call(ty, callee, &args, None);
+    llvm::LLVMSetTailCall(ret, True);
+    if output.is_some() {
+        bx.ret(ret);
+    } else {
+        bx.ret_void()
     }
 }
diff --git a/compiler/rustc_codegen_llvm/src/asm.rs b/compiler/rustc_codegen_llvm/src/asm.rs
index 3722d4350a2..88daa025740 100644
--- a/compiler/rustc_codegen_llvm/src/asm.rs
+++ b/compiler/rustc_codegen_llvm/src/asm.rs
@@ -1,6 +1,5 @@
 use std::assert_matches::assert_matches;
 
-use libc::{c_char, c_uint};
 use rustc_abi::{BackendRepr, Float, Integer, Primitive, Scalar};
 use rustc_ast::{InlineAsmOptions, InlineAsmTemplatePiece};
 use rustc_codegen_ssa::mir::operand::OperandValue;
@@ -286,7 +285,9 @@ impl<'ll, 'tcx> AsmBuilderMethods<'tcx> for Builder<'_, 'll, 'tcx> {
                 InlineAsmArch::M68k => {
                     constraints.push("~{ccr}".to_string());
                 }
-                InlineAsmArch::CSKY => {}
+                InlineAsmArch::CSKY => {
+                    constraints.push("~{psr}".to_string());
+                }
             }
         }
         if !options.contains(InlineAsmOptions::NOMEM) {
@@ -481,12 +482,13 @@ pub(crate) fn inline_asm_call<'ll>(
 
     debug!("Asm Output Type: {:?}", output);
     let fty = bx.cx.type_func(&argtys, output);
-    unsafe {
-        // Ask LLVM to verify that the constraints are well-formed.
-        let constraints_ok = llvm::LLVMRustInlineAsmVerify(fty, cons.as_c_char_ptr(), cons.len());
-        debug!("constraint verification result: {:?}", constraints_ok);
-        if constraints_ok {
-            let v = llvm::LLVMRustInlineAsm(
+    // Ask LLVM to verify that the constraints are well-formed.
+    let constraints_ok =
+        unsafe { llvm::LLVMRustInlineAsmVerify(fty, cons.as_c_char_ptr(), cons.len()) };
+    debug!("constraint verification result: {:?}", constraints_ok);
+    if constraints_ok {
+        let v = unsafe {
+            llvm::LLVMRustInlineAsm(
                 fty,
                 asm.as_c_char_ptr(),
                 asm.len(),
@@ -496,54 +498,50 @@ pub(crate) fn inline_asm_call<'ll>(
                 alignstack,
                 dia,
                 can_throw,
-            );
-
-            let call = if !labels.is_empty() {
-                assert!(catch_funclet.is_none());
-                bx.callbr(fty, None, None, v, inputs, dest.unwrap(), labels, None, None)
-            } else if let Some((catch, funclet)) = catch_funclet {
-                bx.invoke(fty, None, None, v, inputs, dest.unwrap(), catch, funclet, None)
-            } else {
-                bx.call(fty, None, None, v, inputs, None, None)
-            };
+            )
+        };
 
-            // Store mark in a metadata node so we can map LLVM errors
-            // back to source locations. See #17552.
-            let key = "srcloc";
-            let kind = llvm::LLVMGetMDKindIDInContext(
-                bx.llcx,
-                key.as_ptr().cast::<c_char>(),
-                key.len() as c_uint,
-            );
+        let call = if !labels.is_empty() {
+            assert!(catch_funclet.is_none());
+            bx.callbr(fty, None, None, v, inputs, dest.unwrap(), labels, None, None)
+        } else if let Some((catch, funclet)) = catch_funclet {
+            bx.invoke(fty, None, None, v, inputs, dest.unwrap(), catch, funclet, None)
+        } else {
+            bx.call(fty, None, None, v, inputs, None, None)
+        };
 
-            // `srcloc` contains one 64-bit integer for each line of assembly code,
-            // where the lower 32 bits hold the lo byte position and the upper 32 bits
-            // hold the hi byte position.
-            let mut srcloc = vec![];
-            if dia == llvm::AsmDialect::Intel && line_spans.len() > 1 {
-                // LLVM inserts an extra line to add the ".intel_syntax", so add
-                // a dummy srcloc entry for it.
-                //
-                // Don't do this if we only have 1 line span since that may be
-                // due to the asm template string coming from a macro. LLVM will
-                // default to the first srcloc for lines that don't have an
-                // associated srcloc.
-                srcloc.push(llvm::LLVMValueAsMetadata(bx.const_u64(0)));
-            }
-            srcloc.extend(line_spans.iter().map(|span| {
-                llvm::LLVMValueAsMetadata(bx.const_u64(
-                    u64::from(span.lo().to_u32()) | (u64::from(span.hi().to_u32()) << 32),
-                ))
-            }));
-            let md = llvm::LLVMMDNodeInContext2(bx.llcx, srcloc.as_ptr(), srcloc.len());
-            let md = llvm::LLVMMetadataAsValue(&bx.llcx, md);
-            llvm::LLVMSetMetadata(call, kind, md);
+        // Store mark in a metadata node so we can map LLVM errors
+        // back to source locations. See #17552.
+        let key = "srcloc";
+        let kind = bx.get_md_kind_id(key);
 
-            Some(call)
-        } else {
-            // LLVM has detected an issue with our constraints, bail out
-            None
+        // `srcloc` contains one 64-bit integer for each line of assembly code,
+        // where the lower 32 bits hold the lo byte position and the upper 32 bits
+        // hold the hi byte position.
+        let mut srcloc = vec![];
+        if dia == llvm::AsmDialect::Intel && line_spans.len() > 1 {
+            // LLVM inserts an extra line to add the ".intel_syntax", so add
+            // a dummy srcloc entry for it.
+            //
+            // Don't do this if we only have 1 line span since that may be
+            // due to the asm template string coming from a macro. LLVM will
+            // default to the first srcloc for lines that don't have an
+            // associated srcloc.
+            srcloc.push(llvm::LLVMValueAsMetadata(bx.const_u64(0)));
         }
+        srcloc.extend(line_spans.iter().map(|span| {
+            llvm::LLVMValueAsMetadata(
+                bx.const_u64(u64::from(span.lo().to_u32()) | (u64::from(span.hi().to_u32()) << 32)),
+            )
+        }));
+        let md = unsafe { llvm::LLVMMDNodeInContext2(bx.llcx, srcloc.as_ptr(), srcloc.len()) };
+        let md = bx.get_metadata_value(md);
+        llvm::LLVMSetMetadata(call, kind, md);
+
+        Some(call)
+    } else {
+        // LLVM has detected an issue with our constraints, bail out
+        None
     }
 }
 
@@ -937,9 +935,10 @@ fn llvm_fixup_input<'ll, 'tcx>(
             }
             bx.insert_element(bx.const_undef(vec_ty), value, bx.const_i32(0))
         }
-        (AArch64(AArch64InlineAsmRegClass::vreg_low16), BackendRepr::Vector { element, count })
-            if layout.size.bytes() == 8 =>
-        {
+        (
+            AArch64(AArch64InlineAsmRegClass::vreg_low16),
+            BackendRepr::SimdVector { element, count },
+        ) if layout.size.bytes() == 8 => {
             let elem_ty = llvm_asm_scalar_type(bx.cx, element);
             let vec_ty = bx.cx.type_vector(elem_ty, count);
             let indices: Vec<_> = (0..count * 2).map(|x| bx.const_i32(x as i32)).collect();
@@ -952,7 +951,7 @@ fn llvm_fixup_input<'ll, 'tcx>(
         }
         (
             X86(X86InlineAsmRegClass::xmm_reg | X86InlineAsmRegClass::zmm_reg),
-            BackendRepr::Vector { .. },
+            BackendRepr::SimdVector { .. },
         ) if layout.size.bytes() == 64 => bx.bitcast(value, bx.cx.type_vector(bx.cx.type_f64(), 8)),
         (
             X86(
@@ -987,7 +986,7 @@ fn llvm_fixup_input<'ll, 'tcx>(
                 | X86InlineAsmRegClass::ymm_reg
                 | X86InlineAsmRegClass::zmm_reg,
             ),
-            BackendRepr::Vector { element, count: count @ (8 | 16) },
+            BackendRepr::SimdVector { element, count: count @ (8 | 16) },
         ) if element.primitive() == Primitive::Float(Float::F16) => {
             bx.bitcast(value, bx.type_vector(bx.type_i16(), count))
         }
@@ -1024,7 +1023,7 @@ fn llvm_fixup_input<'ll, 'tcx>(
                 | ArmInlineAsmRegClass::qreg_low4
                 | ArmInlineAsmRegClass::qreg_low8,
             ),
-            BackendRepr::Vector { element, count: count @ (4 | 8) },
+            BackendRepr::SimdVector { element, count: count @ (4 | 8) },
         ) if element.primitive() == Primitive::Float(Float::F16) => {
             bx.bitcast(value, bx.type_vector(bx.type_i16(), count))
         }
@@ -1097,9 +1096,10 @@ fn llvm_fixup_output<'ll, 'tcx>(
             }
             value
         }
-        (AArch64(AArch64InlineAsmRegClass::vreg_low16), BackendRepr::Vector { element, count })
-            if layout.size.bytes() == 8 =>
-        {
+        (
+            AArch64(AArch64InlineAsmRegClass::vreg_low16),
+            BackendRepr::SimdVector { element, count },
+        ) if layout.size.bytes() == 8 => {
             let elem_ty = llvm_asm_scalar_type(bx.cx, element);
             let vec_ty = bx.cx.type_vector(elem_ty, count * 2);
             let indices: Vec<_> = (0..count).map(|x| bx.const_i32(x as i32)).collect();
@@ -1112,7 +1112,7 @@ fn llvm_fixup_output<'ll, 'tcx>(
         }
         (
             X86(X86InlineAsmRegClass::xmm_reg | X86InlineAsmRegClass::zmm_reg),
-            BackendRepr::Vector { .. },
+            BackendRepr::SimdVector { .. },
         ) if layout.size.bytes() == 64 => bx.bitcast(value, layout.llvm_type(bx.cx)),
         (
             X86(
@@ -1143,7 +1143,7 @@ fn llvm_fixup_output<'ll, 'tcx>(
                 | X86InlineAsmRegClass::ymm_reg
                 | X86InlineAsmRegClass::zmm_reg,
             ),
-            BackendRepr::Vector { element, count: count @ (8 | 16) },
+            BackendRepr::SimdVector { element, count: count @ (8 | 16) },
         ) if element.primitive() == Primitive::Float(Float::F16) => {
             bx.bitcast(value, bx.type_vector(bx.type_f16(), count))
         }
@@ -1180,7 +1180,7 @@ fn llvm_fixup_output<'ll, 'tcx>(
                 | ArmInlineAsmRegClass::qreg_low4
                 | ArmInlineAsmRegClass::qreg_low8,
             ),
-            BackendRepr::Vector { element, count: count @ (4 | 8) },
+            BackendRepr::SimdVector { element, count: count @ (4 | 8) },
         ) if element.primitive() == Primitive::Float(Float::F16) => {
             bx.bitcast(value, bx.type_vector(bx.type_f16(), count))
         }
@@ -1241,9 +1241,10 @@ fn llvm_fixup_output_type<'ll, 'tcx>(
             let count = 16 / layout.size.bytes();
             cx.type_vector(elem_ty, count)
         }
-        (AArch64(AArch64InlineAsmRegClass::vreg_low16), BackendRepr::Vector { element, count })
-            if layout.size.bytes() == 8 =>
-        {
+        (
+            AArch64(AArch64InlineAsmRegClass::vreg_low16),
+            BackendRepr::SimdVector { element, count },
+        ) if layout.size.bytes() == 8 => {
             let elem_ty = llvm_asm_scalar_type(cx, element);
             cx.type_vector(elem_ty, count * 2)
         }
@@ -1254,7 +1255,7 @@ fn llvm_fixup_output_type<'ll, 'tcx>(
         }
         (
             X86(X86InlineAsmRegClass::xmm_reg | X86InlineAsmRegClass::zmm_reg),
-            BackendRepr::Vector { .. },
+            BackendRepr::SimdVector { .. },
         ) if layout.size.bytes() == 64 => cx.type_vector(cx.type_f64(), 8),
         (
             X86(
@@ -1282,7 +1283,7 @@ fn llvm_fixup_output_type<'ll, 'tcx>(
                 | X86InlineAsmRegClass::ymm_reg
                 | X86InlineAsmRegClass::zmm_reg,
             ),
-            BackendRepr::Vector { element, count: count @ (8 | 16) },
+            BackendRepr::SimdVector { element, count: count @ (8 | 16) },
         ) if element.primitive() == Primitive::Float(Float::F16) => {
             cx.type_vector(cx.type_i16(), count)
         }
@@ -1319,7 +1320,7 @@ fn llvm_fixup_output_type<'ll, 'tcx>(
                 | ArmInlineAsmRegClass::qreg_low4
                 | ArmInlineAsmRegClass::qreg_low8,
             ),
-            BackendRepr::Vector { element, count: count @ (4 | 8) },
+            BackendRepr::SimdVector { element, count: count @ (4 | 8) },
         ) if element.primitive() == Primitive::Float(Float::F16) => {
             cx.type_vector(cx.type_i16(), count)
         }
diff --git a/compiler/rustc_codegen_llvm/src/back/archive.rs b/compiler/rustc_codegen_llvm/src/back/archive.rs
index 33a956e552f..0a161442933 100644
--- a/compiler/rustc_codegen_llvm/src/back/archive.rs
+++ b/compiler/rustc_codegen_llvm/src/back/archive.rs
@@ -11,7 +11,7 @@ use rustc_codegen_ssa::back::archive::{
 use rustc_session::Session;
 
 use crate::llvm::archive_ro::{ArchiveRO, Child};
-use crate::llvm::{self, ArchiveKind};
+use crate::llvm::{self, ArchiveKind, last_error};
 
 /// Helper for adding many files to an archive.
 #[must_use = "must call build() to finish building the archive"]
@@ -132,14 +132,33 @@ fn get_llvm_object_symbols(
     if err.is_null() {
         return Ok(true);
     } else {
-        return Err(unsafe { *Box::from_raw(err as *mut io::Error) });
+        let error = unsafe { *Box::from_raw(err as *mut io::Error) };
+        // These are the magic constants for LLVM bitcode files:
+        // https://github.com/llvm/llvm-project/blob/7eadc1960d199676f04add402bb0aa6f65b7b234/llvm/lib/BinaryFormat/Magic.cpp#L90-L97
+        if buf.starts_with(&[0xDE, 0xCE, 0x17, 0x0B]) || buf.starts_with(&[b'B', b'C', 0xC0, 0xDE])
+        {
+            // For LLVM bitcode, failure to read the symbols is not fatal. The bitcode may have been
+            // produced by a newer LLVM version that the one linked to rustc. This is fine provided
+            // that the linker does use said newer LLVM version. We skip writing the symbols for the
+            // bitcode to the symbol table of the archive. Traditional linkers don't like this, but
+            // newer linkers like lld, mold and wild ignore the symbol table anyway, so if they link
+            // against a new enough LLVM it will work out in the end.
+            // LLVM's archive writer also has this same behavior of only warning about invalid
+            // bitcode since https://github.com/llvm/llvm-project/pull/96848
+
+            // We don't have access to the DiagCtxt here to produce a nice warning in the correct format.
+            eprintln!("warning: Failed to read symbol table from LLVM bitcode: {}", error);
+            return Ok(true);
+        } else {
+            return Err(error);
+        }
     }
 
     unsafe extern "C" fn callback(state: *mut c_void, symbol_name: *const c_char) -> *mut c_void {
         let f = unsafe { &mut *(state as *mut &mut dyn FnMut(&[u8]) -> io::Result<()>) };
         match f(unsafe { CStr::from_ptr(symbol_name) }.to_bytes()) {
             Ok(()) => std::ptr::null_mut(),
-            Err(err) => Box::into_raw(Box::new(err)) as *mut c_void,
+            Err(err) => Box::into_raw(Box::new(err) as Box<io::Error>) as *mut c_void,
         }
     }
 
@@ -148,7 +167,7 @@ fn get_llvm_object_symbols(
         Box::into_raw(Box::new(io::Error::new(
             io::ErrorKind::Other,
             format!("LLVM error: {}", error.to_string_lossy()),
-        ))) as *mut c_void
+        )) as Box<io::Error>) as *mut c_void
     }
 }
 
@@ -169,6 +188,8 @@ impl<'a> LlvmArchiveBuilder<'a> {
             .unwrap_or_else(|kind| self.sess.dcx().emit_fatal(UnknownArchiveKind { kind }));
 
         let mut additions = mem::take(&mut self.additions);
+        // Values in the `members` list below will contain pointers to the strings allocated here.
+        // So they need to get dropped after all elements of `members` get freed.
         let mut strings = Vec::new();
         let mut members = Vec::new();
 
@@ -229,12 +250,7 @@ impl<'a> LlvmArchiveBuilder<'a> {
                 self.sess.target.arch == "arm64ec",
             );
             let ret = if r.into_result().is_err() {
-                let err = llvm::LLVMRustGetLastError();
-                let msg = if err.is_null() {
-                    "failed to write archive".into()
-                } else {
-                    String::from_utf8_lossy(CStr::from_ptr(err).to_bytes())
-                };
+                let msg = last_error().unwrap_or_else(|| "failed to write archive".into());
                 Err(io::Error::new(io::ErrorKind::Other, msg))
             } else {
                 Ok(!members.is_empty())
diff --git a/compiler/rustc_codegen_llvm/src/back/lto.rs b/compiler/rustc_codegen_llvm/src/back/lto.rs
index 78c759bbe8c..a8b49e9552c 100644
--- a/compiler/rustc_codegen_llvm/src/back/lto.rs
+++ b/compiler/rustc_codegen_llvm/src/back/lto.rs
@@ -2,6 +2,7 @@ use std::collections::BTreeMap;
 use std::ffi::{CStr, CString};
 use std::fs::File;
 use std::path::Path;
+use std::ptr::NonNull;
 use std::sync::Arc;
 use std::{io, iter, slice};
 
@@ -305,11 +306,8 @@ fn fat_lto(
             assert!(!serialized_modules.is_empty(), "must have at least one serialized module");
             let (buffer, name) = serialized_modules.remove(0);
             info!("no in-memory regular modules to choose from, parsing {:?}", name);
-            ModuleCodegen {
-                module_llvm: ModuleLlvm::parse(cgcx, &name, buffer.data(), dcx)?,
-                name: name.into_string().unwrap(),
-                kind: ModuleKind::Regular,
-            }
+            let llvm_module = ModuleLlvm::parse(cgcx, &name, buffer.data(), dcx)?;
+            ModuleCodegen::new_regular(name.into_string().unwrap(), llvm_module)
         }
     };
     {
@@ -362,8 +360,8 @@ fn fat_lto(
                 ptr as *const *const libc::c_char,
                 symbols_below_threshold.len() as libc::size_t,
             );
-            save_temp_bitcode(cgcx, &module, "lto.after-restriction");
         }
+        save_temp_bitcode(cgcx, &module, "lto.after-restriction");
     }
 
     Ok(LtoModuleCodegen::Fat(module))
@@ -586,6 +584,44 @@ fn thin_lto(
     }
 }
 
+fn enable_autodiff_settings(ad: &[config::AutoDiff], module: &mut ModuleCodegen<ModuleLlvm>) {
+    for &val in ad {
+        match val {
+            config::AutoDiff::PrintModBefore => {
+                unsafe { llvm::LLVMDumpModule(module.module_llvm.llmod()) };
+            }
+            config::AutoDiff::PrintPerf => {
+                llvm::set_print_perf(true);
+            }
+            config::AutoDiff::PrintAA => {
+                llvm::set_print_activity(true);
+            }
+            config::AutoDiff::PrintTA => {
+                llvm::set_print_type(true);
+            }
+            config::AutoDiff::Inline => {
+                llvm::set_inline(true);
+            }
+            config::AutoDiff::LooseTypes => {
+                llvm::set_loose_types(false);
+            }
+            config::AutoDiff::PrintSteps => {
+                llvm::set_print(true);
+            }
+            // We handle this below
+            config::AutoDiff::PrintModAfter => {}
+            // We handle this below
+            config::AutoDiff::PrintModFinal => {}
+            // This is required and already checked
+            config::AutoDiff::Enable => {}
+        }
+    }
+    // This helps with handling enums for now.
+    llvm::set_strict_aliasing(false);
+    // FIXME(ZuseZ4): Test this, since it was added a long time ago.
+    llvm::set_rust_rules(true);
+}
+
 pub(crate) fn run_pass_manager(
     cgcx: &CodegenContext<LlvmCodegenBackend>,
     dcx: DiagCtxtHandle<'_>,
@@ -604,13 +640,43 @@ pub(crate) fn run_pass_manager(
     let opt_stage = if thin { llvm::OptStage::ThinLTO } else { llvm::OptStage::FatLTO };
     let opt_level = config.opt_level.unwrap_or(config::OptLevel::No);
 
-    // If this rustc version was build with enzyme/autodiff enabled, and if users applied the
-    // `#[autodiff]` macro at least once, then we will later call llvm_optimize a second time.
-    let first_run = true;
-    debug!("running llvm pm opt pipeline");
+    // The PostAD behavior is the same that we would have if no autodiff was used.
+    // It will run the default optimization pipeline. If AD is enabled we select
+    // the DuringAD stage, which will disable vectorization and loop unrolling, and
+    // schedule two autodiff optimization + differentiation passes.
+    // We then run the llvm_optimize function a second time, to optimize the code which we generated
+    // in the enzyme differentiation pass.
+    let enable_ad = config.autodiff.contains(&config::AutoDiff::Enable);
+    let stage =
+        if enable_ad { write::AutodiffStage::DuringAD } else { write::AutodiffStage::PostAD };
+
+    if enable_ad {
+        enable_autodiff_settings(&config.autodiff, module);
+    }
+
     unsafe {
-        write::llvm_optimize(cgcx, dcx, module, config, opt_level, opt_stage, first_run)?;
+        write::llvm_optimize(cgcx, dcx, module, None, config, opt_level, opt_stage, stage)?;
     }
+
+    if cfg!(llvm_enzyme) && enable_ad {
+        // This is the post-autodiff IR, mainly used for testing and educational purposes.
+        if config.autodiff.contains(&config::AutoDiff::PrintModAfter) {
+            unsafe { llvm::LLVMDumpModule(module.module_llvm.llmod()) };
+        }
+
+        let opt_stage = llvm::OptStage::FatLTO;
+        let stage = write::AutodiffStage::PostAD;
+        unsafe {
+            write::llvm_optimize(cgcx, dcx, module, None, config, opt_level, opt_stage, stage)?;
+        }
+
+        // This is the final IR, so people should be able to inspect the optimized autodiff output,
+        // for manual inspection.
+        if config.autodiff.contains(&config::AutoDiff::PrintModFinal) {
+            unsafe { llvm::LLVMDumpModule(module.module_llvm.llmod()) };
+        }
+    }
+
     debug!("lto done");
     Ok(())
 }
@@ -621,7 +687,7 @@ unsafe impl Send for ModuleBuffer {}
 unsafe impl Sync for ModuleBuffer {}
 
 impl ModuleBuffer {
-    pub fn new(m: &llvm::Module) -> ModuleBuffer {
+    pub(crate) fn new(m: &llvm::Module) -> ModuleBuffer {
         ModuleBuffer(unsafe { llvm::LLVMRustModuleBufferCreate(m) })
     }
 }
@@ -663,12 +729,17 @@ unsafe impl Send for ThinBuffer {}
 unsafe impl Sync for ThinBuffer {}
 
 impl ThinBuffer {
-    pub fn new(m: &llvm::Module, is_thin: bool, emit_summary: bool) -> ThinBuffer {
+    pub(crate) fn new(m: &llvm::Module, is_thin: bool, emit_summary: bool) -> ThinBuffer {
         unsafe {
             let buffer = llvm::LLVMRustThinLTOBufferCreate(m, is_thin, emit_summary);
             ThinBuffer(buffer)
         }
     }
+
+    pub(crate) unsafe fn from_raw_ptr(ptr: *mut llvm::ThinLTOBuffer) -> ThinBuffer {
+        let mut ptr = NonNull::new(ptr).unwrap();
+        ThinBuffer(unsafe { ptr.as_mut() })
+    }
 }
 
 impl ThinBufferMethods for ThinBuffer {
@@ -712,11 +783,11 @@ pub(crate) unsafe fn optimize_thin_module(
     // crates but for locally codegened modules we may be able to reuse
     // that LLVM Context and Module.
     let module_llvm = ModuleLlvm::parse(cgcx, module_name, thin_module.data(), dcx)?;
-    let mut module = ModuleCodegen {
-        module_llvm,
-        name: thin_module.name().to_string(),
-        kind: ModuleKind::Regular,
-    };
+    let mut module = ModuleCodegen::new_regular(thin_module.name(), module_llvm);
+    // Given that the newly created module lacks a thinlto buffer for embedding, we need to re-add it here.
+    if cgcx.config(ModuleKind::Regular).embed_bitcode() {
+        module.thin_lto_buffer = Some(thin_module.data().to_vec());
+    }
     {
         let target = &*module.module_llvm.tm;
         let llmod = module.module_llvm.llmod();
@@ -733,7 +804,9 @@ pub(crate) unsafe fn optimize_thin_module(
         {
             let _timer =
                 cgcx.prof.generic_activity_with_arg("LLVM_thin_lto_rename", thin_module.name());
-            unsafe { llvm::LLVMRustPrepareThinLTORename(thin_module.shared.data.0, llmod, target) };
+            unsafe {
+                llvm::LLVMRustPrepareThinLTORename(thin_module.shared.data.0, llmod, target.raw())
+            };
             save_temp_bitcode(cgcx, &module, "thin-lto-after-rename");
         }
 
@@ -763,7 +836,7 @@ pub(crate) unsafe fn optimize_thin_module(
             let _timer =
                 cgcx.prof.generic_activity_with_arg("LLVM_thin_lto_import", thin_module.name());
             if unsafe {
-                !llvm::LLVMRustPrepareThinLTOImport(thin_module.shared.data.0, llmod, target)
+                !llvm::LLVMRustPrepareThinLTOImport(thin_module.shared.data.0, llmod, target.raw())
             } {
                 return Err(write::llvm_err(dcx, LlvmError::PrepareThinLtoModule));
             }
diff --git a/compiler/rustc_codegen_llvm/src/back/owned_target_machine.rs b/compiler/rustc_codegen_llvm/src/back/owned_target_machine.rs
index 4cbd49aa44d..dfde4595590 100644
--- a/compiler/rustc_codegen_llvm/src/back/owned_target_machine.rs
+++ b/compiler/rustc_codegen_llvm/src/back/owned_target_machine.rs
@@ -1,6 +1,5 @@
 use std::ffi::{CStr, c_char};
 use std::marker::PhantomData;
-use std::ops::Deref;
 use std::ptr::NonNull;
 
 use rustc_data_structures::small_c_str::SmallCStr;
@@ -17,7 +16,7 @@ pub struct OwnedTargetMachine {
 }
 
 impl OwnedTargetMachine {
-    pub fn new(
+    pub(crate) fn new(
         triple: &CStr,
         cpu: &CStr,
         features: &CStr,
@@ -80,12 +79,12 @@ impl OwnedTargetMachine {
             .map(|tm_unique| Self { tm_unique, phantom: PhantomData })
             .ok_or_else(|| LlvmError::CreateTargetMachine { triple: SmallCStr::from(triple) })
     }
-}
-
-impl Deref for OwnedTargetMachine {
-    type Target = llvm::TargetMachine;
 
-    fn deref(&self) -> &Self::Target {
+    /// Returns inner `llvm::TargetMachine` type.
+    ///
+    /// This could be a `Deref` implementation, but `llvm::TargetMachine` is an extern type and
+    /// `Deref::Target: ?Sized`.
+    pub fn raw(&self) -> &llvm::TargetMachine {
         // SAFETY: constructing ensures we have a valid pointer created by
         // llvm::LLVMRustCreateTargetMachine.
         unsafe { self.tm_unique.as_ref() }
diff --git a/compiler/rustc_codegen_llvm/src/back/write.rs b/compiler/rustc_codegen_llvm/src/back/write.rs
index 4706744f353..bf6138142b6 100644
--- a/compiler/rustc_codegen_llvm/src/back/write.rs
+++ b/compiler/rustc_codegen_llvm/src/back/write.rs
@@ -1,6 +1,7 @@
 use std::ffi::{CStr, CString};
 use std::io::{self, Write};
 use std::path::{Path, PathBuf};
+use std::ptr::null_mut;
 use std::sync::Arc;
 use std::{fs, slice, str};
 
@@ -15,7 +16,7 @@ use rustc_codegen_ssa::back::write::{
     TargetMachineFactoryFn,
 };
 use rustc_codegen_ssa::traits::*;
-use rustc_codegen_ssa::{CompiledModule, ModuleCodegen};
+use rustc_codegen_ssa::{CompiledModule, ModuleCodegen, ModuleKind};
 use rustc_data_structures::profiling::SelfProfilerRef;
 use rustc_data_structures::small_c_str::SmallCStr;
 use rustc_errors::{DiagCtxtHandle, FatalError, Level};
@@ -40,7 +41,7 @@ use crate::errors::{
     WithLlvmError, WriteBytecode,
 };
 use crate::llvm::diagnostic::OptimizationDiagnosticKind::*;
-use crate::llvm::{self, DiagnosticInfo, PassManager};
+use crate::llvm::{self, DiagnosticInfo};
 use crate::type_::Type;
 use crate::{LlvmCodegenBackend, ModuleLlvm, base, common, llvm_util};
 
@@ -54,7 +55,7 @@ pub(crate) fn llvm_err<'a>(dcx: DiagCtxtHandle<'_>, err: LlvmError<'a>) -> Fatal
 fn write_output_file<'ll>(
     dcx: DiagCtxtHandle<'_>,
     target: &'ll llvm::TargetMachine,
-    pm: &llvm::PassManager<'ll>,
+    no_builtins: bool,
     m: &'ll llvm::Module,
     output: &Path,
     dwo_output: Option<&Path>,
@@ -63,16 +64,19 @@ fn write_output_file<'ll>(
     verify_llvm_ir: bool,
 ) -> Result<(), FatalError> {
     debug!("write_output_file output={:?} dwo_output={:?}", output, dwo_output);
-    unsafe {
-        let output_c = path_to_c_string(output);
-        let dwo_output_c;
-        let dwo_output_ptr = if let Some(dwo_output) = dwo_output {
-            dwo_output_c = path_to_c_string(dwo_output);
-            dwo_output_c.as_ptr()
-        } else {
-            std::ptr::null()
-        };
-        let result = llvm::LLVMRustWriteOutputFile(
+    let output_c = path_to_c_string(output);
+    let dwo_output_c;
+    let dwo_output_ptr = if let Some(dwo_output) = dwo_output {
+        dwo_output_c = path_to_c_string(dwo_output);
+        dwo_output_c.as_ptr()
+    } else {
+        std::ptr::null()
+    };
+    let result = unsafe {
+        let pm = llvm::LLVMCreatePassManager();
+        llvm::LLVMAddAnalysisPasses(target, pm);
+        llvm::LLVMRustAddLibraryInfo(pm, m, no_builtins);
+        llvm::LLVMRustWriteOutputFile(
             target,
             pm,
             m,
@@ -80,22 +84,22 @@ fn write_output_file<'ll>(
             dwo_output_ptr,
             file_type,
             verify_llvm_ir,
-        );
+        )
+    };
 
-        // Record artifact sizes for self-profiling
-        if result == llvm::LLVMRustResult::Success {
-            let artifact_kind = match file_type {
-                llvm::FileType::ObjectFile => "object_file",
-                llvm::FileType::AssemblyFile => "assembly_file",
-            };
-            record_artifact_size(self_profiler_ref, artifact_kind, output);
-            if let Some(dwo_file) = dwo_output {
-                record_artifact_size(self_profiler_ref, "dwo_file", dwo_file);
-            }
+    // Record artifact sizes for self-profiling
+    if result == llvm::LLVMRustResult::Success {
+        let artifact_kind = match file_type {
+            llvm::FileType::ObjectFile => "object_file",
+            llvm::FileType::AssemblyFile => "assembly_file",
+        };
+        record_artifact_size(self_profiler_ref, artifact_kind, output);
+        if let Some(dwo_file) = dwo_output {
+            record_artifact_size(self_profiler_ref, "dwo_file", dwo_file);
         }
-
-        result.into_result().map_err(|()| llvm_err(dcx, LlvmError::WriteOutput { path: output }))
     }
+
+    result.into_result().map_err(|()| llvm_err(dcx, LlvmError::WriteOutput { path: output }))
 }
 
 pub(crate) fn create_informational_target_machine(
@@ -138,7 +142,7 @@ fn to_llvm_opt_settings(cfg: config::OptLevel) -> (llvm::CodeGenOptLevel, llvm::
     match cfg {
         No => (llvm::CodeGenOptLevel::None, llvm::CodeGenOptSizeNone),
         Less => (llvm::CodeGenOptLevel::Less, llvm::CodeGenOptSizeNone),
-        Default => (llvm::CodeGenOptLevel::Default, llvm::CodeGenOptSizeNone),
+        More => (llvm::CodeGenOptLevel::Default, llvm::CodeGenOptSizeNone),
         Aggressive => (llvm::CodeGenOptLevel::Aggressive, llvm::CodeGenOptSizeNone),
         Size => (llvm::CodeGenOptLevel::Default, llvm::CodeGenOptSizeDefault),
         SizeMin => (llvm::CodeGenOptLevel::Default, llvm::CodeGenOptSizeAggressive),
@@ -150,7 +154,7 @@ fn to_pass_builder_opt_level(cfg: config::OptLevel) -> llvm::PassBuilderOptLevel
     match cfg {
         No => llvm::PassBuilderOptLevel::O0,
         Less => llvm::PassBuilderOptLevel::O1,
-        Default => llvm::PassBuilderOptLevel::O2,
+        More => llvm::PassBuilderOptLevel::O2,
         Aggressive => llvm::PassBuilderOptLevel::O3,
         Size => llvm::PassBuilderOptLevel::Os,
         SizeMin => llvm::PassBuilderOptLevel::Oz,
@@ -325,13 +329,17 @@ pub(crate) fn save_temp_bitcode(
     if !cgcx.save_temps {
         return;
     }
+    let ext = format!("{name}.bc");
+    let cgu = Some(&module.name[..]);
+    let path = cgcx.output_filenames.temp_path_ext(&ext, cgu);
+    write_bitcode_to_file(module, &path)
+}
+
+fn write_bitcode_to_file(module: &ModuleCodegen<ModuleLlvm>, path: &Path) {
     unsafe {
-        let ext = format!("{name}.bc");
-        let cgu = Some(&module.name[..]);
-        let path = cgcx.output_filenames.temp_path_ext(&ext, cgu);
-        let cstr = path_to_c_string(&path);
+        let path = path_to_c_string(&path);
         let llmod = module.module_llvm.llmod();
-        llvm::LLVMWriteBitcodeToFile(llmod, cstr.as_ptr());
+        llvm::LLVMWriteBitcodeToFile(llmod, path.as_ptr());
     }
 }
 
@@ -530,14 +538,25 @@ fn get_instr_profile_output_path(config: &ModuleConfig) -> Option<CString> {
     config.instrument_coverage.then(|| c"default_%m_%p.profraw".to_owned())
 }
 
+// PreAD will run llvm opts but disable size increasing opts (vectorization, loop unrolling)
+// DuringAD is the same as above, but also runs the enzyme opt and autodiff passes.
+// PostAD will run all opts, including size increasing opts.
+#[derive(Debug, Eq, PartialEq)]
+pub(crate) enum AutodiffStage {
+    PreAD,
+    DuringAD,
+    PostAD,
+}
+
 pub(crate) unsafe fn llvm_optimize(
     cgcx: &CodegenContext<LlvmCodegenBackend>,
     dcx: DiagCtxtHandle<'_>,
     module: &ModuleCodegen<ModuleLlvm>,
+    thin_lto_buffer: Option<&mut *mut llvm::ThinLTOBuffer>,
     config: &ModuleConfig,
     opt_level: config::OptLevel,
     opt_stage: llvm::OptStage,
-    skip_size_increasing_opts: bool,
+    autodiff_stage: AutodiffStage,
 ) -> Result<(), FatalError> {
     // Enzyme:
     // The whole point of compiler based AD is to differentiate optimized IR instead of unoptimized
@@ -547,15 +566,16 @@ pub(crate) unsafe fn llvm_optimize(
     // FIXME(ZuseZ4): In a future update we could figure out how to only optimize individual functions getting
     // differentiated.
 
+    let consider_ad = cfg!(llvm_enzyme) && config.autodiff.contains(&config::AutoDiff::Enable);
+    let run_enzyme = autodiff_stage == AutodiffStage::DuringAD;
     let unroll_loops;
     let vectorize_slp;
     let vectorize_loop;
 
     // When we build rustc with enzyme/autodiff support, we want to postpone size-increasing
-    // optimizations until after differentiation. FIXME(ZuseZ4): Before shipping on nightly,
-    // we should make this more granular, or at least check that the user has at least one autodiff
-    // call in their code, to justify altering the compilation pipeline.
-    if skip_size_increasing_opts && cfg!(llvm_enzyme) {
+    // optimizations until after differentiation. Our pipeline is thus: (opt + enzyme), (full opt).
+    // We therefore have two calls to llvm_optimize, if autodiff is used.
+    if consider_ad && autodiff_stage != AutodiffStage::PostAD {
         unroll_loops = false;
         vectorize_slp = false;
         vectorize_loop = false;
@@ -565,8 +585,18 @@ pub(crate) unsafe fn llvm_optimize(
         vectorize_slp = config.vectorize_slp;
         vectorize_loop = config.vectorize_loop;
     }
-    trace!(?unroll_loops, ?vectorize_slp, ?vectorize_loop);
-    let using_thin_buffers = opt_stage == llvm::OptStage::PreLinkThinLTO || config.bitcode_needed();
+    trace!(?unroll_loops, ?vectorize_slp, ?vectorize_loop, ?run_enzyme);
+    if thin_lto_buffer.is_some() {
+        assert!(
+            matches!(
+                opt_stage,
+                llvm::OptStage::PreLinkNoLTO
+                    | llvm::OptStage::PreLinkFatLTO
+                    | llvm::OptStage::PreLinkThinLTO
+            ),
+            "the bitcode for LTO can only be obtained at the pre-link stage"
+        );
+    }
     let pgo_gen_path = get_pgo_gen_path(config);
     let pgo_use_path = get_pgo_use_path(config);
     let pgo_sample_use_path = get_pgo_sample_use_path(config);
@@ -619,20 +649,23 @@ pub(crate) unsafe fn llvm_optimize(
     let result = unsafe {
         llvm::LLVMRustOptimize(
             module.module_llvm.llmod(),
-            &*module.module_llvm.tm,
+            &*module.module_llvm.tm.raw(),
             to_pass_builder_opt_level(opt_level),
             opt_stage,
             cgcx.opts.cg.linker_plugin_lto.enabled(),
             config.no_prepopulate_passes,
             config.verify_llvm_ir,
             config.lint_llvm_ir,
-            using_thin_buffers,
+            thin_lto_buffer,
+            config.emit_thin_lto,
+            config.emit_thin_lto_summary,
             config.merge_functions,
             unroll_loops,
             vectorize_slp,
             vectorize_loop,
             config.no_builtins,
             config.emit_lifetime_markers,
+            run_enzyme,
             sanitizer_options.as_ref(),
             pgo_gen_path.as_ref().map_or(std::ptr::null(), |s| s.as_ptr()),
             pgo_use_path.as_ref().map_or(std::ptr::null(), |s| s.as_ptr()),
@@ -656,12 +689,11 @@ pub(crate) unsafe fn llvm_optimize(
 pub(crate) unsafe fn optimize(
     cgcx: &CodegenContext<LlvmCodegenBackend>,
     dcx: DiagCtxtHandle<'_>,
-    module: &ModuleCodegen<ModuleLlvm>,
+    module: &mut ModuleCodegen<ModuleLlvm>,
     config: &ModuleConfig,
 ) -> Result<(), FatalError> {
     let _timer = cgcx.prof.generic_activity_with_arg("LLVM_module_optimize", &*module.name);
 
-    let llmod = module.module_llvm.llmod();
     let llcx = &*module.module_llvm.llcx;
     let _handlers = DiagnosticHandlers::new(cgcx, dcx, llcx, module, CodegenDiagnosticsStage::Opt);
 
@@ -670,8 +702,7 @@ pub(crate) unsafe fn optimize(
 
     if config.emit_no_opt_bc {
         let out = cgcx.output_filenames.temp_path_ext("no-opt.bc", module_name);
-        let out = path_to_c_string(&out);
-        unsafe { llvm::LLVMWriteBitcodeToFile(llmod, out.as_ptr()) };
+        write_bitcode_to_file(module, &out)
     }
 
     // FIXME(ZuseZ4): support SanitizeHWAddress and prevent illegal/unsupported opts
@@ -684,19 +715,57 @@ pub(crate) unsafe fn optimize(
             _ => llvm::OptStage::PreLinkNoLTO,
         };
 
-        // If we know that we will later run AD, then we disable vectorization and loop unrolling
-        let skip_size_increasing_opts = cfg!(llvm_enzyme);
-        return unsafe {
+        // If we know that we will later run AD, then we disable vectorization and loop unrolling.
+        // Otherwise we pretend AD is already done and run the normal opt pipeline (=PostAD).
+        let consider_ad = cfg!(llvm_enzyme) && config.autodiff.contains(&config::AutoDiff::Enable);
+        let autodiff_stage = if consider_ad { AutodiffStage::PreAD } else { AutodiffStage::PostAD };
+        // The embedded bitcode is used to run LTO/ThinLTO.
+        // The bitcode obtained during the `codegen` phase is no longer suitable for performing LTO.
+        // It may have undergone LTO due to ThinLocal, so we need to obtain the embedded bitcode at
+        // this point.
+        let mut thin_lto_buffer = if (module.kind == ModuleKind::Regular
+            && config.emit_obj == EmitObj::ObjectCode(BitcodeSection::Full))
+            || config.emit_thin_lto_summary
+        {
+            Some(null_mut())
+        } else {
+            None
+        };
+        unsafe {
             llvm_optimize(
                 cgcx,
                 dcx,
                 module,
+                thin_lto_buffer.as_mut(),
                 config,
                 opt_level,
                 opt_stage,
-                skip_size_increasing_opts,
+                autodiff_stage,
             )
-        };
+        }?;
+        if let Some(thin_lto_buffer) = thin_lto_buffer {
+            let thin_lto_buffer = unsafe { ThinBuffer::from_raw_ptr(thin_lto_buffer) };
+            module.thin_lto_buffer = Some(thin_lto_buffer.data().to_vec());
+            let bc_summary_out =
+                cgcx.output_filenames.temp_path(OutputType::ThinLinkBitcode, module_name);
+            if config.emit_thin_lto_summary
+                && let Some(thin_link_bitcode_filename) = bc_summary_out.file_name()
+            {
+                let summary_data = thin_lto_buffer.thin_link_data();
+                cgcx.prof.artifact_size(
+                    "llvm_bitcode_summary",
+                    thin_link_bitcode_filename.to_string_lossy(),
+                    summary_data.len() as u64,
+                );
+                let _timer = cgcx.prof.generic_activity_with_arg(
+                    "LLVM_module_codegen_emit_bitcode_summary",
+                    &*module.name,
+                );
+                if let Err(err) = fs::write(&bc_summary_out, summary_data) {
+                    dcx.emit_err(WriteBytecode { path: &bc_summary_out, err });
+                }
+            }
+        }
     }
     Ok(())
 }
@@ -744,89 +813,46 @@ pub(crate) unsafe fn codegen(
             create_msvc_imps(cgcx, llcx, llmod);
         }
 
-        // A codegen-specific pass manager is used to generate object
-        // files for an LLVM module.
-        //
-        // Apparently each of these pass managers is a one-shot kind of
-        // thing, so we create a new one for each type of output. The
-        // pass manager passed to the closure should be ensured to not
-        // escape the closure itself, and the manager should only be
-        // used once.
-        unsafe fn with_codegen<'ll, F, R>(
-            tm: &'ll llvm::TargetMachine,
-            llmod: &'ll llvm::Module,
-            no_builtins: bool,
-            f: F,
-        ) -> R
-        where
-            F: FnOnce(&'ll mut PassManager<'ll>) -> R,
-        {
-            unsafe {
-                let cpm = llvm::LLVMCreatePassManager();
-                llvm::LLVMAddAnalysisPasses(tm, cpm);
-                llvm::LLVMRustAddLibraryInfo(cpm, llmod, no_builtins);
-                f(cpm)
-            }
-        }
-
         // Note that if object files are just LLVM bitcode we write bitcode,
         // copy it to the .o file, and delete the bitcode if it wasn't
         // otherwise requested.
 
         let bc_out = cgcx.output_filenames.temp_path(OutputType::Bitcode, module_name);
-        let bc_summary_out =
-            cgcx.output_filenames.temp_path(OutputType::ThinLinkBitcode, module_name);
         let obj_out = cgcx.output_filenames.temp_path(OutputType::Object, module_name);
 
         if config.bitcode_needed() {
-            let _timer = cgcx
-                .prof
-                .generic_activity_with_arg("LLVM_module_codegen_make_bitcode", &*module.name);
-            let thin = ThinBuffer::new(llmod, config.emit_thin_lto, config.emit_thin_lto_summary);
-            let data = thin.data();
-
-            if let Some(bitcode_filename) = bc_out.file_name() {
-                cgcx.prof.artifact_size(
-                    "llvm_bitcode",
-                    bitcode_filename.to_string_lossy(),
-                    data.len() as u64,
-                );
-            }
-
-            if config.emit_thin_lto_summary
-                && let Some(thin_link_bitcode_filename) = bc_summary_out.file_name()
-            {
-                let summary_data = thin.thin_link_data();
-                cgcx.prof.artifact_size(
-                    "llvm_bitcode_summary",
-                    thin_link_bitcode_filename.to_string_lossy(),
-                    summary_data.len() as u64,
-                );
-
-                let _timer = cgcx.prof.generic_activity_with_arg(
-                    "LLVM_module_codegen_emit_bitcode_summary",
-                    &*module.name,
-                );
-                if let Err(err) = fs::write(&bc_summary_out, summary_data) {
-                    dcx.emit_err(WriteBytecode { path: &bc_summary_out, err });
-                }
-            }
-
             if config.emit_bc || config.emit_obj == EmitObj::Bitcode {
+                let thin = {
+                    let _timer = cgcx.prof.generic_activity_with_arg(
+                        "LLVM_module_codegen_make_bitcode",
+                        &*module.name,
+                    );
+                    ThinBuffer::new(llmod, config.emit_thin_lto, false)
+                };
+                let data = thin.data();
                 let _timer = cgcx
                     .prof
                     .generic_activity_with_arg("LLVM_module_codegen_emit_bitcode", &*module.name);
+                if let Some(bitcode_filename) = bc_out.file_name() {
+                    cgcx.prof.artifact_size(
+                        "llvm_bitcode",
+                        bitcode_filename.to_string_lossy(),
+                        data.len() as u64,
+                    );
+                }
                 if let Err(err) = fs::write(&bc_out, data) {
                     dcx.emit_err(WriteBytecode { path: &bc_out, err });
                 }
             }
 
-            if config.emit_obj == EmitObj::ObjectCode(BitcodeSection::Full) {
+            if config.embed_bitcode() && module.kind == ModuleKind::Regular {
                 let _timer = cgcx
                     .prof
                     .generic_activity_with_arg("LLVM_module_codegen_embed_bitcode", &*module.name);
+                let thin_bc =
+                    module.thin_lto_buffer.as_deref().expect("cannot find embedded bitcode");
                 unsafe {
-                    embed_bitcode(cgcx, llcx, llmod, &config.bc_cmdline, data);
+                    embed_bitcode(cgcx, llcx, llmod, &config.bc_cmdline, &thin_bc);
                 }
             }
         }
@@ -887,21 +913,17 @@ pub(crate) unsafe fn codegen(
             } else {
                 llmod
             };
-            unsafe {
-                with_codegen(tm, llmod, config.no_builtins, |cpm| {
-                    write_output_file(
-                        dcx,
-                        tm,
-                        cpm,
-                        llmod,
-                        &path,
-                        None,
-                        llvm::FileType::AssemblyFile,
-                        &cgcx.prof,
-                        config.verify_llvm_ir,
-                    )
-                })?;
-            }
+            write_output_file(
+                dcx,
+                tm.raw(),
+                config.no_builtins,
+                llmod,
+                &path,
+                None,
+                llvm::FileType::AssemblyFile,
+                &cgcx.prof,
+                config.verify_llvm_ir,
+            )?;
         }
 
         match config.emit_obj {
@@ -925,21 +947,17 @@ pub(crate) unsafe fn codegen(
                     (_, SplitDwarfKind::Split) => Some(dwo_out.as_path()),
                 };
 
-                unsafe {
-                    with_codegen(tm, llmod, config.no_builtins, |cpm| {
-                        write_output_file(
-                            dcx,
-                            tm,
-                            cpm,
-                            llmod,
-                            &obj_out,
-                            dwo_out,
-                            llvm::FileType::ObjectFile,
-                            &cgcx.prof,
-                            config.verify_llvm_ir,
-                        )
-                    })?;
-                }
+                write_output_file(
+                    dcx,
+                    tm.raw(),
+                    config.no_builtins,
+                    llmod,
+                    &obj_out,
+                    dwo_out,
+                    llvm::FileType::ObjectFile,
+                    &cgcx.prof,
+                    config.verify_llvm_ir,
+                )?;
             }
 
             EmitObj::Bitcode => {
@@ -1006,7 +1024,7 @@ fn create_section_with_flags_asm(section_name: &str, section_flags: &str, data:
 }
 
 pub(crate) fn bitcode_section_name(cgcx: &CodegenContext<LlvmCodegenBackend>) -> &'static CStr {
-    if cgcx.target_is_like_osx {
+    if cgcx.target_is_like_darwin {
         c"__LLVM,__bitcode"
     } else if cgcx.target_is_like_aix {
         c".ipa"
@@ -1059,32 +1077,26 @@ unsafe fn embed_bitcode(
     // and COFF we emit the sections using module level inline assembly for that
     // reason (see issue #90326 for historical background).
     unsafe {
-        if cgcx.target_is_like_osx
+        if cgcx.target_is_like_darwin
             || cgcx.target_is_like_aix
             || cgcx.target_arch == "wasm32"
             || cgcx.target_arch == "wasm64"
         {
             // We don't need custom section flags, create LLVM globals.
             let llconst = common::bytes_in_context(llcx, bitcode);
-            let llglobal = llvm::LLVMAddGlobal(
-                llmod,
-                common::val_ty(llconst),
-                c"rustc.embedded.module".as_ptr(),
-            );
-            llvm::LLVMSetInitializer(llglobal, llconst);
+            let llglobal =
+                llvm::add_global(llmod, common::val_ty(llconst), c"rustc.embedded.module");
+            llvm::set_initializer(llglobal, llconst);
 
             llvm::set_section(llglobal, bitcode_section_name(cgcx));
             llvm::set_linkage(llglobal, llvm::Linkage::PrivateLinkage);
             llvm::LLVMSetGlobalConstant(llglobal, llvm::True);
 
             let llconst = common::bytes_in_context(llcx, cmdline.as_bytes());
-            let llglobal = llvm::LLVMAddGlobal(
-                llmod,
-                common::val_ty(llconst),
-                c"rustc.embedded.cmdline".as_ptr(),
-            );
-            llvm::LLVMSetInitializer(llglobal, llconst);
-            let section = if cgcx.target_is_like_osx {
+            let llglobal =
+                llvm::add_global(llmod, common::val_ty(llconst), c"rustc.embedded.cmdline");
+            llvm::set_initializer(llglobal, llconst);
+            let section = if cgcx.target_is_like_darwin {
                 c"__LLVM,__cmdline"
             } else if cgcx.target_is_like_aix {
                 c".info"
@@ -1123,31 +1135,29 @@ fn create_msvc_imps(
     // underscores added in front).
     let prefix = if cgcx.target_arch == "x86" { "\x01__imp__" } else { "\x01__imp_" };
 
-    unsafe {
-        let ptr_ty = Type::ptr_llcx(llcx);
-        let globals = base::iter_globals(llmod)
-            .filter(|&val| {
-                llvm::get_linkage(val) == llvm::Linkage::ExternalLinkage
-                    && llvm::LLVMIsDeclaration(val) == 0
-            })
-            .filter_map(|val| {
-                // Exclude some symbols that we know are not Rust symbols.
-                let name = llvm::get_value_name(val);
-                if ignored(name) { None } else { Some((val, name)) }
-            })
-            .map(move |(val, name)| {
-                let mut imp_name = prefix.as_bytes().to_vec();
-                imp_name.extend(name);
-                let imp_name = CString::new(imp_name).unwrap();
-                (imp_name, val)
-            })
-            .collect::<Vec<_>>();
+    let ptr_ty = Type::ptr_llcx(llcx);
+    let globals = base::iter_globals(llmod)
+        .filter(|&val| {
+            llvm::get_linkage(val) == llvm::Linkage::ExternalLinkage && !llvm::is_declaration(val)
+        })
+        .filter_map(|val| {
+            // Exclude some symbols that we know are not Rust symbols.
+            let name = llvm::get_value_name(val);
+            if ignored(name) { None } else { Some((val, name)) }
+        })
+        .map(move |(val, name)| {
+            let mut imp_name = prefix.as_bytes().to_vec();
+            imp_name.extend(name);
+            let imp_name = CString::new(imp_name).unwrap();
+            (imp_name, val)
+        })
+        .collect::<Vec<_>>();
 
-        for (imp_name, val) in globals {
-            let imp = llvm::LLVMAddGlobal(llmod, ptr_ty, imp_name.as_ptr());
-            llvm::LLVMSetInitializer(imp, val);
-            llvm::set_linkage(imp, llvm::Linkage::ExternalLinkage);
-        }
+    for (imp_name, val) in globals {
+        let imp = llvm::add_global(llmod, ptr_ty, &imp_name);
+
+        llvm::set_initializer(imp, val);
+        llvm::set_linkage(imp, llvm::Linkage::ExternalLinkage);
     }
 
     // Use this function to exclude certain symbols from `__imp` generation.
diff --git a/compiler/rustc_codegen_llvm/src/base.rs b/compiler/rustc_codegen_llvm/src/base.rs
index d05faf5577b..6bd27914dbd 100644
--- a/compiler/rustc_codegen_llvm/src/base.rs
+++ b/compiler/rustc_codegen_llvm/src/base.rs
@@ -13,10 +13,10 @@
 
 use std::time::Instant;
 
+use rustc_codegen_ssa::ModuleCodegen;
 use rustc_codegen_ssa::base::maybe_create_entry_wrapper;
 use rustc_codegen_ssa::mono_item::MonoItemExt;
 use rustc_codegen_ssa::traits::*;
-use rustc_codegen_ssa::{ModuleCodegen, ModuleKind};
 use rustc_data_structures::small_c_str::SmallCStr;
 use rustc_middle::dep_graph;
 use rustc_middle::middle::codegen_fn_attrs::CodegenFnAttrs;
@@ -133,11 +133,7 @@ pub(crate) fn compile_codegen_unit(
             }
         }
 
-        ModuleCodegen {
-            name: cgu_name.to_string(),
-            module_llvm: llvm_module,
-            kind: ModuleKind::Regular,
-        }
+        ModuleCodegen::new_regular(cgu_name.to_string(), llvm_module)
     }
 
     (module, cost)
@@ -157,9 +153,7 @@ pub(crate) fn linkage_to_llvm(linkage: Linkage) -> llvm::Linkage {
         Linkage::LinkOnceODR => llvm::Linkage::LinkOnceODRLinkage,
         Linkage::WeakAny => llvm::Linkage::WeakAnyLinkage,
         Linkage::WeakODR => llvm::Linkage::WeakODRLinkage,
-        Linkage::Appending => llvm::Linkage::AppendingLinkage,
         Linkage::Internal => llvm::Linkage::InternalLinkage,
-        Linkage::Private => llvm::Linkage::PrivateLinkage,
         Linkage::ExternalWeak => llvm::Linkage::ExternalWeakLinkage,
         Linkage::Common => llvm::Linkage::CommonLinkage,
     }
diff --git a/compiler/rustc_codegen_llvm/src/builder.rs b/compiler/rustc_codegen_llvm/src/builder.rs
index 264d43c6d46..297f104d124 100644
--- a/compiler/rustc_codegen_llvm/src/builder.rs
+++ b/compiler/rustc_codegen_llvm/src/builder.rs
@@ -4,7 +4,7 @@ use std::{iter, ptr};
 
 pub(crate) mod autodiff;
 
-use libc::{c_char, c_uint};
+use libc::{c_char, c_uint, size_t};
 use rustc_abi as abi;
 use rustc_abi::{Align, Size, WrappingRange};
 use rustc_codegen_ssa::MemFlags;
@@ -14,6 +14,7 @@ use rustc_codegen_ssa::mir::place::PlaceRef;
 use rustc_codegen_ssa::traits::*;
 use rustc_data_structures::small_c_str::SmallCStr;
 use rustc_hir::def_id::DefId;
+use rustc_middle::bug;
 use rustc_middle::middle::codegen_fn_attrs::CodegenFnAttrs;
 use rustc_middle::ty::layout::{
     FnAbiError, FnAbiOfHelpers, FnAbiRequest, HasTypingEnv, LayoutError, LayoutOfHelpers,
@@ -29,24 +30,26 @@ use smallvec::SmallVec;
 use tracing::{debug, instrument};
 
 use crate::abi::FnAbiLlvmExt;
-use crate::attributes;
 use crate::common::Funclet;
-use crate::context::{CodegenCx, SimpleCx};
-use crate::llvm::{self, AtomicOrdering, AtomicRmwBinOp, BasicBlock, False, True};
+use crate::context::{CodegenCx, FullCx, GenericCx, SCx};
+use crate::llvm::{
+    self, AtomicOrdering, AtomicRmwBinOp, BasicBlock, False, GEPNoWrapFlags, Metadata, True,
+};
 use crate::type_::Type;
 use crate::type_of::LayoutLlvmExt;
 use crate::value::Value;
+use crate::{attributes, llvm_util};
 
 #[must_use]
-pub(crate) struct GenericBuilder<'a, 'll, CX: Borrow<SimpleCx<'ll>>> {
+pub(crate) struct GenericBuilder<'a, 'll, CX: Borrow<SCx<'ll>>> {
     pub llbuilder: &'ll mut llvm::Builder<'ll>,
-    pub cx: &'a CX,
+    pub cx: &'a GenericCx<'ll, CX>,
 }
 
-pub(crate) type SBuilder<'a, 'll> = GenericBuilder<'a, 'll, SimpleCx<'ll>>;
-pub(crate) type Builder<'a, 'll, 'tcx> = GenericBuilder<'a, 'll, CodegenCx<'ll, 'tcx>>;
+pub(crate) type SBuilder<'a, 'll> = GenericBuilder<'a, 'll, SCx<'ll>>;
+pub(crate) type Builder<'a, 'll, 'tcx> = GenericBuilder<'a, 'll, FullCx<'ll, 'tcx>>;
 
-impl<'a, 'll, CX: Borrow<SimpleCx<'ll>>> Drop for GenericBuilder<'a, 'll, CX> {
+impl<'a, 'll, CX: Borrow<SCx<'ll>>> Drop for GenericBuilder<'a, 'll, CX> {
     fn drop(&mut self) {
         unsafe {
             llvm::LLVMDisposeBuilder(&mut *(self.llbuilder as *mut _));
@@ -55,7 +58,7 @@ impl<'a, 'll, CX: Borrow<SimpleCx<'ll>>> Drop for GenericBuilder<'a, 'll, CX> {
 }
 
 impl<'a, 'll> SBuilder<'a, 'll> {
-    fn call(
+    pub(crate) fn call(
         &mut self,
         llty: &'ll Type,
         llfn: &'ll Value,
@@ -85,79 +88,36 @@ impl<'a, 'll> SBuilder<'a, 'll> {
         };
         call
     }
+}
 
-    fn with_scx(scx: &'a SimpleCx<'ll>) -> Self {
+impl<'a, 'll, CX: Borrow<SCx<'ll>>> GenericBuilder<'a, 'll, CX> {
+    fn with_cx(scx: &'a GenericCx<'ll, CX>) -> Self {
         // Create a fresh builder from the simple context.
-        let llbuilder = unsafe { llvm::LLVMCreateBuilderInContext(scx.llcx) };
-        SBuilder { llbuilder, cx: scx }
+        let llbuilder = unsafe { llvm::LLVMCreateBuilderInContext(scx.deref().borrow().llcx) };
+        GenericBuilder { llbuilder, cx: scx }
     }
-}
-impl<'a, 'll, CX: Borrow<SimpleCx<'ll>>> GenericBuilder<'a, 'll, CX> {
+
     pub(crate) fn bitcast(&mut self, val: &'ll Value, dest_ty: &'ll Type) -> &'ll Value {
         unsafe { llvm::LLVMBuildBitCast(self.llbuilder, val, dest_ty, UNNAMED) }
     }
 
-    fn ret_void(&mut self) {
-        unsafe {
-            llvm::LLVMBuildRetVoid(self.llbuilder);
-        }
+    pub(crate) fn ret_void(&mut self) {
+        llvm::LLVMBuildRetVoid(self.llbuilder);
     }
 
-    fn ret(&mut self, v: &'ll Value) {
+    pub(crate) fn ret(&mut self, v: &'ll Value) {
         unsafe {
             llvm::LLVMBuildRet(self.llbuilder, v);
         }
     }
-}
-impl<'a, 'll> SBuilder<'a, 'll> {
-    fn build(cx: &'a SimpleCx<'ll>, llbb: &'ll BasicBlock) -> SBuilder<'a, 'll> {
-        let bx = SBuilder::with_scx(cx);
+
+    pub(crate) fn build(cx: &'a GenericCx<'ll, CX>, llbb: &'ll BasicBlock) -> Self {
+        let bx = Self::with_cx(cx);
         unsafe {
             llvm::LLVMPositionBuilderAtEnd(bx.llbuilder, llbb);
         }
         bx
     }
-
-    fn check_call<'b>(
-        &mut self,
-        typ: &str,
-        fn_ty: &'ll Type,
-        llfn: &'ll Value,
-        args: &'b [&'ll Value],
-    ) -> Cow<'b, [&'ll Value]> {
-        assert!(
-            self.cx.type_kind(fn_ty) == TypeKind::Function,
-            "builder::{typ} not passed a function, but {fn_ty:?}"
-        );
-
-        let param_tys = self.cx.func_params_types(fn_ty);
-
-        let all_args_match = iter::zip(&param_tys, args.iter().map(|&v| self.cx.val_ty(v)))
-            .all(|(expected_ty, actual_ty)| *expected_ty == actual_ty);
-
-        if all_args_match {
-            return Cow::Borrowed(args);
-        }
-
-        let casted_args: Vec<_> = iter::zip(param_tys, args)
-            .enumerate()
-            .map(|(i, (expected_ty, &actual_val))| {
-                let actual_ty = self.cx.val_ty(actual_val);
-                if expected_ty != actual_ty {
-                    debug!(
-                        "type mismatch in function call of {:?}. \
-                            Expected {:?} for param {}, got {:?}; injecting bitcast",
-                        llfn, expected_ty, i, actual_ty
-                    );
-                    self.bitcast(actual_val, expected_ty)
-                } else {
-                    actual_val
-                }
-            })
-            .collect();
-
-        Cow::Owned(casted_args)
-    }
 }
 
 /// Empty string, to be used where LLVM expects an instruction name, indicating
@@ -165,17 +125,17 @@ impl<'a, 'll> SBuilder<'a, 'll> {
 // FIXME(eddyb) pass `&CStr` directly to FFI once it's a thin pointer.
 const UNNAMED: *const c_char = c"".as_ptr();
 
-impl<'ll, 'tcx> BackendTypes for Builder<'_, 'll, 'tcx> {
-    type Value = <CodegenCx<'ll, 'tcx> as BackendTypes>::Value;
-    type Metadata = <CodegenCx<'ll, 'tcx> as BackendTypes>::Metadata;
-    type Function = <CodegenCx<'ll, 'tcx> as BackendTypes>::Function;
-    type BasicBlock = <CodegenCx<'ll, 'tcx> as BackendTypes>::BasicBlock;
-    type Type = <CodegenCx<'ll, 'tcx> as BackendTypes>::Type;
-    type Funclet = <CodegenCx<'ll, 'tcx> as BackendTypes>::Funclet;
-
-    type DIScope = <CodegenCx<'ll, 'tcx> as BackendTypes>::DIScope;
-    type DILocation = <CodegenCx<'ll, 'tcx> as BackendTypes>::DILocation;
-    type DIVariable = <CodegenCx<'ll, 'tcx> as BackendTypes>::DIVariable;
+impl<'ll, CX: Borrow<SCx<'ll>>> BackendTypes for GenericBuilder<'_, 'll, CX> {
+    type Value = <GenericCx<'ll, CX> as BackendTypes>::Value;
+    type Metadata = <GenericCx<'ll, CX> as BackendTypes>::Metadata;
+    type Function = <GenericCx<'ll, CX> as BackendTypes>::Function;
+    type BasicBlock = <GenericCx<'ll, CX> as BackendTypes>::BasicBlock;
+    type Type = <GenericCx<'ll, CX> as BackendTypes>::Type;
+    type Funclet = <GenericCx<'ll, CX> as BackendTypes>::Funclet;
+
+    type DIScope = <GenericCx<'ll, CX> as BackendTypes>::DIScope;
+    type DILocation = <GenericCx<'ll, CX> as BackendTypes>::DILocation;
+    type DIVariable = <GenericCx<'ll, CX> as BackendTypes>::DIVariable;
 }
 
 impl abi::HasDataLayout for Builder<'_, '_, '_> {
@@ -291,9 +251,7 @@ impl<'a, 'll, 'tcx> BuilderMethods<'a, 'tcx> for Builder<'a, 'll, 'tcx> {
     }
 
     fn ret_void(&mut self) {
-        unsafe {
-            llvm::LLVMBuildRetVoid(self.llbuilder);
-        }
+        llvm::LLVMBuildRetVoid(self.llbuilder);
     }
 
     fn ret(&mut self, v: &'ll Value) {
@@ -333,6 +291,50 @@ impl<'a, 'll, 'tcx> BuilderMethods<'a, 'tcx> for Builder<'a, 'll, 'tcx> {
         }
     }
 
+    fn switch_with_weights(
+        &mut self,
+        v: Self::Value,
+        else_llbb: Self::BasicBlock,
+        else_is_cold: bool,
+        cases: impl ExactSizeIterator<Item = (u128, Self::BasicBlock, bool)>,
+    ) {
+        if self.cx.sess().opts.optimize == rustc_session::config::OptLevel::No {
+            self.switch(v, else_llbb, cases.map(|(val, dest, _)| (val, dest)));
+            return;
+        }
+
+        let id_str = "branch_weights";
+        let id = unsafe {
+            llvm::LLVMMDStringInContext2(self.cx.llcx, id_str.as_ptr().cast(), id_str.len())
+        };
+
+        // For switch instructions with 2 targets, the `llvm.expect` intrinsic is used.
+        // This function handles switch instructions with more than 2 targets and it needs to
+        // emit branch weights metadata instead of using the intrinsic.
+        // The values 1 and 2000 are the same as the values used by the `llvm.expect` intrinsic.
+        let cold_weight = llvm::LLVMValueAsMetadata(self.cx.const_u32(1));
+        let hot_weight = llvm::LLVMValueAsMetadata(self.cx.const_u32(2000));
+        let weight =
+            |is_cold: bool| -> &Metadata { if is_cold { cold_weight } else { hot_weight } };
+
+        let mut md: SmallVec<[&Metadata; 16]> = SmallVec::with_capacity(cases.len() + 2);
+        md.push(id);
+        md.push(weight(else_is_cold));
+
+        let switch =
+            unsafe { llvm::LLVMBuildSwitch(self.llbuilder, v, else_llbb, cases.len() as c_uint) };
+        for (on_val, dest, is_cold) in cases {
+            let on_val = self.const_uint_big(self.val_ty(v), on_val);
+            unsafe { llvm::LLVMAddCase(switch, on_val, dest) }
+            md.push(weight(is_cold));
+        }
+
+        unsafe {
+            let md_node = llvm::LLVMMDNodeInContext2(self.cx.llcx, md.as_ptr(), md.len() as size_t);
+            self.cx.set_metadata(switch, llvm::MD_prof, md_node);
+        }
+    }
+
     fn invoke(
         &mut self,
         llty: &'ll Type,
@@ -359,7 +361,7 @@ impl<'a, 'll, 'tcx> BuilderMethods<'a, 'tcx> for Builder<'a, 'll, 'tcx> {
 
         // Emit KCFI operand bundle
         let kcfi_bundle = self.kcfi_operand_bundle(fn_attrs, fn_abi, instance, llfn);
-        if let Some(kcfi_bundle) = kcfi_bundle.as_deref() {
+        if let Some(kcfi_bundle) = kcfi_bundle.as_ref().map(|b| b.raw()) {
             bundles.push(kcfi_bundle);
         }
 
@@ -421,6 +423,37 @@ impl<'a, 'll, 'tcx> BuilderMethods<'a, 'tcx> for Builder<'a, 'll, 'tcx> {
         unchecked_umul(x, y) => LLVMBuildNUWMul,
     }
 
+    fn unchecked_suadd(&mut self, a: &'ll Value, b: &'ll Value) -> &'ll Value {
+        unsafe {
+            let add = llvm::LLVMBuildAdd(self.llbuilder, a, b, UNNAMED);
+            if llvm::LLVMIsAInstruction(add).is_some() {
+                llvm::LLVMSetNUW(add, True);
+                llvm::LLVMSetNSW(add, True);
+            }
+            add
+        }
+    }
+    fn unchecked_susub(&mut self, a: &'ll Value, b: &'ll Value) -> &'ll Value {
+        unsafe {
+            let sub = llvm::LLVMBuildSub(self.llbuilder, a, b, UNNAMED);
+            if llvm::LLVMIsAInstruction(sub).is_some() {
+                llvm::LLVMSetNUW(sub, True);
+                llvm::LLVMSetNSW(sub, True);
+            }
+            sub
+        }
+    }
+    fn unchecked_sumul(&mut self, a: &'ll Value, b: &'ll Value) -> &'ll Value {
+        unsafe {
+            let mul = llvm::LLVMBuildMul(self.llbuilder, a, b, UNNAMED);
+            if llvm::LLVMIsAInstruction(mul).is_some() {
+                llvm::LLVMSetNUW(mul, True);
+                llvm::LLVMSetNSW(mul, True);
+            }
+            mul
+        }
+    }
+
     fn or_disjoint(&mut self, a: &'ll Value, b: &'ll Value) -> &'ll Value {
         unsafe {
             let or = llvm::LLVMBuildOr(self.llbuilder, a, b, UNNAMED);
@@ -531,7 +564,7 @@ impl<'a, 'll, 'tcx> BuilderMethods<'a, 'tcx> for Builder<'a, 'll, 'tcx> {
 
     fn to_immediate_scalar(&mut self, val: Self::Value, scalar: abi::Scalar) -> Self::Value {
         if scalar.is_bool() {
-            return self.trunc(val, self.cx().type_i1());
+            return self.unchecked_utrunc(val, self.cx().type_i1());
         }
         val
     }
@@ -543,7 +576,8 @@ impl<'a, 'll, 'tcx> BuilderMethods<'a, 'tcx> for Builder<'a, 'll, 'tcx> {
         unsafe {
             let alloca = llvm::LLVMBuildAlloca(bx.llbuilder, ty, UNNAMED);
             llvm::LLVMSetAlignment(alloca, align.bytes() as c_uint);
-            alloca
+            // Cast to default addrspace if necessary
+            llvm::LLVMBuildPointerCast(bx.llbuilder, alloca, self.cx().type_ptr(), UNNAMED)
         }
     }
 
@@ -552,7 +586,8 @@ impl<'a, 'll, 'tcx> BuilderMethods<'a, 'tcx> for Builder<'a, 'll, 'tcx> {
             let alloca =
                 llvm::LLVMBuildArrayAlloca(self.llbuilder, self.cx().type_i8(), size, UNNAMED);
             llvm::LLVMSetAlignment(alloca, align.bytes() as c_uint);
-            alloca
+            // Cast to default addrspace if necessary
+            llvm::LLVMBuildPointerCast(self.llbuilder, alloca, self.cx().type_ptr(), UNNAMED)
         }
     }
 
@@ -669,10 +704,12 @@ impl<'a, 'll, 'tcx> BuilderMethods<'a, 'tcx> for Builder<'a, 'll, 'tcx> {
                 let load = self.load(llty, place.val.llval, place.val.align);
                 if let abi::BackendRepr::Scalar(scalar) = place.layout.backend_repr {
                     scalar_load_metadata(self, load, scalar, place.layout, Size::ZERO);
+                    self.to_immediate_scalar(load, scalar)
+                } else {
+                    load
                 }
-                load
             });
-            OperandValue::Immediate(self.to_immediate(llval, place.layout))
+            OperandValue::Immediate(llval)
         } else if let abi::BackendRepr::ScalarPair(a, b) = place.layout.backend_repr {
             let b_offset = a.size(self).align_to(b.align(self).abi);
 
@@ -831,13 +868,14 @@ impl<'a, 'll, 'tcx> BuilderMethods<'a, 'tcx> for Builder<'a, 'll, 'tcx> {
 
     fn gep(&mut self, ty: &'ll Type, ptr: &'ll Value, indices: &[&'ll Value]) -> &'ll Value {
         unsafe {
-            llvm::LLVMBuildGEP2(
+            llvm::LLVMBuildGEPWithNoWrapFlags(
                 self.llbuilder,
                 ty,
                 ptr,
                 indices.as_ptr(),
                 indices.len() as c_uint,
                 UNNAMED,
+                GEPNoWrapFlags::default(),
             )
         }
     }
@@ -849,13 +887,33 @@ impl<'a, 'll, 'tcx> BuilderMethods<'a, 'tcx> for Builder<'a, 'll, 'tcx> {
         indices: &[&'ll Value],
     ) -> &'ll Value {
         unsafe {
-            llvm::LLVMBuildInBoundsGEP2(
+            llvm::LLVMBuildGEPWithNoWrapFlags(
                 self.llbuilder,
                 ty,
                 ptr,
                 indices.as_ptr(),
                 indices.len() as c_uint,
                 UNNAMED,
+                GEPNoWrapFlags::InBounds,
+            )
+        }
+    }
+
+    fn inbounds_nuw_gep(
+        &mut self,
+        ty: &'ll Type,
+        ptr: &'ll Value,
+        indices: &[&'ll Value],
+    ) -> &'ll Value {
+        unsafe {
+            llvm::LLVMBuildGEPWithNoWrapFlags(
+                self.llbuilder,
+                ty,
+                ptr,
+                indices.as_ptr(),
+                indices.len() as c_uint,
+                UNNAMED,
+                GEPNoWrapFlags::InBounds | GEPNoWrapFlags::NUW,
             )
         }
     }
@@ -865,6 +923,34 @@ impl<'a, 'll, 'tcx> BuilderMethods<'a, 'tcx> for Builder<'a, 'll, 'tcx> {
         unsafe { llvm::LLVMBuildTrunc(self.llbuilder, val, dest_ty, UNNAMED) }
     }
 
+    fn unchecked_utrunc(&mut self, val: &'ll Value, dest_ty: &'ll Type) -> &'ll Value {
+        debug_assert_ne!(self.val_ty(val), dest_ty);
+
+        let trunc = self.trunc(val, dest_ty);
+        if llvm_util::get_version() >= (19, 0, 0) {
+            unsafe {
+                if llvm::LLVMIsAInstruction(trunc).is_some() {
+                    llvm::LLVMSetNUW(trunc, True);
+                }
+            }
+        }
+        trunc
+    }
+
+    fn unchecked_strunc(&mut self, val: &'ll Value, dest_ty: &'ll Type) -> &'ll Value {
+        debug_assert_ne!(self.val_ty(val), dest_ty);
+
+        let trunc = self.trunc(val, dest_ty);
+        if llvm_util::get_version() >= (19, 0, 0) {
+            unsafe {
+                if llvm::LLVMIsAInstruction(trunc).is_some() {
+                    llvm::LLVMSetNSW(trunc, True);
+                }
+            }
+        }
+        trunc
+    }
+
     fn sext(&mut self, val: &'ll Value, dest_ty: &'ll Type) -> &'ll Value {
         unsafe { llvm::LLVMBuildSExt(self.llbuilder, val, dest_ty, UNNAMED) }
     }
@@ -989,6 +1075,35 @@ impl<'a, 'll, 'tcx> BuilderMethods<'a, 'tcx> for Builder<'a, 'll, 'tcx> {
         unsafe { llvm::LLVMBuildFCmp(self.llbuilder, op as c_uint, lhs, rhs, UNNAMED) }
     }
 
+    fn three_way_compare(
+        &mut self,
+        ty: Ty<'tcx>,
+        lhs: Self::Value,
+        rhs: Self::Value,
+    ) -> Option<Self::Value> {
+        // FIXME: See comment on the definition of `three_way_compare`.
+        if crate::llvm_util::get_version() < (20, 0, 0) {
+            return None;
+        }
+
+        let name = match (ty.is_signed(), ty.primitive_size(self.tcx).bits()) {
+            (true, 8) => "llvm.scmp.i8.i8",
+            (true, 16) => "llvm.scmp.i8.i16",
+            (true, 32) => "llvm.scmp.i8.i32",
+            (true, 64) => "llvm.scmp.i8.i64",
+            (true, 128) => "llvm.scmp.i8.i128",
+
+            (false, 8) => "llvm.ucmp.i8.i8",
+            (false, 16) => "llvm.ucmp.i8.i16",
+            (false, 32) => "llvm.ucmp.i8.i32",
+            (false, 64) => "llvm.ucmp.i8.i64",
+            (false, 128) => "llvm.ucmp.i8.i128",
+
+            _ => bug!("three-way compare unsupported for type {ty:?}"),
+        };
+        Some(self.call_intrinsic(name, &[lhs, rhs]))
+    }
+
     /* Miscellaneous instructions */
     fn memcpy(
         &mut self,
@@ -1303,7 +1418,7 @@ impl<'a, 'll, 'tcx> BuilderMethods<'a, 'tcx> for Builder<'a, 'll, 'tcx> {
 
         // Emit KCFI operand bundle
         let kcfi_bundle = self.kcfi_operand_bundle(fn_attrs, fn_abi, instance, llfn);
-        if let Some(kcfi_bundle) = kcfi_bundle.as_deref() {
+        if let Some(kcfi_bundle) = kcfi_bundle.as_ref().map(|b| b.raw()) {
             bundles.push(kcfi_bundle);
         }
 
@@ -1346,26 +1461,12 @@ impl<'ll> StaticBuilderMethods for Builder<'_, 'll, '_> {
 }
 
 impl<'a, 'll, 'tcx> Builder<'a, 'll, 'tcx> {
-    fn build(cx: &'a CodegenCx<'ll, 'tcx>, llbb: &'ll BasicBlock) -> Builder<'a, 'll, 'tcx> {
-        let bx = Builder::with_cx(cx);
-        unsafe {
-            llvm::LLVMPositionBuilderAtEnd(bx.llbuilder, llbb);
-        }
-        bx
-    }
-
-    fn with_cx(cx: &'a CodegenCx<'ll, 'tcx>) -> Self {
-        // Create a fresh builder from the crate context.
-        let llbuilder = unsafe { llvm::LLVMCreateBuilderInContext(cx.llcx) };
-        Builder { llbuilder, cx }
-    }
-
     pub(crate) fn llfn(&self) -> &'ll Value {
         unsafe { llvm::LLVMGetBasicBlockParent(self.llbb()) }
     }
 }
 
-impl<'a, 'll, CX: Borrow<SimpleCx<'ll>>> GenericBuilder<'a, 'll, CX> {
+impl<'a, 'll, CX: Borrow<SCx<'ll>>> GenericBuilder<'a, 'll, CX> {
     fn position_at_start(&mut self, llbb: &'ll BasicBlock) {
         unsafe {
             llvm::LLVMRustPositionBuilderAtStart(self.llbuilder, llbb);
@@ -1395,7 +1496,7 @@ impl<'a, 'll, 'tcx> Builder<'a, 'll, 'tcx> {
         }
     }
 }
-impl<'a, 'll, CX: Borrow<SimpleCx<'ll>>> GenericBuilder<'a, 'll, CX> {
+impl<'a, 'll, CX: Borrow<SCx<'ll>>> GenericBuilder<'a, 'll, CX> {
     pub(crate) fn minnum(&mut self, lhs: &'ll Value, rhs: &'ll Value) -> &'ll Value {
         unsafe { llvm::LLVMRustBuildMinNum(self.llbuilder, lhs, rhs) }
     }
@@ -1496,9 +1597,7 @@ impl<'a, 'll, CX: Borrow<SimpleCx<'ll>>> GenericBuilder<'a, 'll, CX> {
         let ret = unsafe { llvm::LLVMBuildCatchRet(self.llbuilder, funclet.cleanuppad(), unwind) };
         ret.expect("LLVM does not have support for catchret")
     }
-}
 
-impl<'a, 'll, 'tcx> Builder<'a, 'll, 'tcx> {
     fn check_call<'b>(
         &mut self,
         typ: &str,
@@ -1513,7 +1612,7 @@ impl<'a, 'll, 'tcx> Builder<'a, 'll, 'tcx> {
 
         let param_tys = self.cx.func_params_types(fn_ty);
 
-        let all_args_match = iter::zip(&param_tys, args.iter().map(|&v| self.val_ty(v)))
+        let all_args_match = iter::zip(&param_tys, args.iter().map(|&v| self.cx.val_ty(v)))
             .all(|(expected_ty, actual_ty)| *expected_ty == actual_ty);
 
         if all_args_match {
@@ -1523,7 +1622,7 @@ impl<'a, 'll, 'tcx> Builder<'a, 'll, 'tcx> {
         let casted_args: Vec<_> = iter::zip(param_tys, args)
             .enumerate()
             .map(|(i, (expected_ty, &actual_val))| {
-                let actual_ty = self.val_ty(actual_val);
+                let actual_ty = self.cx.val_ty(actual_val);
                 if expected_ty != actual_ty {
                     debug!(
                         "type mismatch in function call of {:?}. \
@@ -1539,12 +1638,12 @@ impl<'a, 'll, 'tcx> Builder<'a, 'll, 'tcx> {
 
         Cow::Owned(casted_args)
     }
-}
-impl<'a, 'll, CX: Borrow<SimpleCx<'ll>>> GenericBuilder<'a, 'll, CX> {
+
     pub(crate) fn va_arg(&mut self, list: &'ll Value, ty: &'ll Type) -> &'ll Value {
         unsafe { llvm::LLVMBuildVAArg(self.llbuilder, list, ty, UNNAMED) }
     }
 }
+
 impl<'a, 'll, 'tcx> Builder<'a, 'll, 'tcx> {
     pub(crate) fn call_intrinsic(&mut self, intrinsic: &str, args: &[&'ll Value]) -> &'ll Value {
         let (ty, f) = self.cx.get_intrinsic(intrinsic);
@@ -1564,7 +1663,7 @@ impl<'a, 'll, 'tcx> Builder<'a, 'll, 'tcx> {
         self.call_intrinsic(intrinsic, &[self.cx.const_u64(size), ptr]);
     }
 }
-impl<'a, 'll, CX: Borrow<SimpleCx<'ll>>> GenericBuilder<'a, 'll, CX> {
+impl<'a, 'll, CX: Borrow<SCx<'ll>>> GenericBuilder<'a, 'll, CX> {
     pub(crate) fn phi(
         &mut self,
         ty: &'ll Type,
@@ -1652,7 +1751,7 @@ impl<'a, 'll, 'tcx> Builder<'a, 'll, 'tcx> {
 
         // Emit KCFI operand bundle
         let kcfi_bundle = self.kcfi_operand_bundle(fn_attrs, fn_abi, instance, llfn);
-        if let Some(kcfi_bundle) = kcfi_bundle.as_deref() {
+        if let Some(kcfi_bundle) = kcfi_bundle.as_ref().map(|b| b.raw()) {
             bundles.push(kcfi_bundle);
         }
 
diff --git a/compiler/rustc_codegen_llvm/src/builder/autodiff.rs b/compiler/rustc_codegen_llvm/src/builder/autodiff.rs
index 9e8e4e1c567..7d264ba4d00 100644
--- a/compiler/rustc_codegen_llvm/src/builder/autodiff.rs
+++ b/compiler/rustc_codegen_llvm/src/builder/autodiff.rs
@@ -3,29 +3,238 @@ use std::ptr;
 use rustc_ast::expand::autodiff_attrs::{AutoDiffAttrs, AutoDiffItem, DiffActivity, DiffMode};
 use rustc_codegen_ssa::ModuleCodegen;
 use rustc_codegen_ssa::back::write::ModuleConfig;
+use rustc_codegen_ssa::common::TypeKind;
+use rustc_codegen_ssa::traits::BaseTypeCodegenMethods;
 use rustc_errors::FatalError;
-use rustc_session::config::Lto;
+use rustc_middle::bug;
 use tracing::{debug, trace};
 
-use crate::back::write::{llvm_err, llvm_optimize};
+use crate::back::write::llvm_err;
 use crate::builder::SBuilder;
 use crate::context::SimpleCx;
 use crate::declare::declare_simple_fn;
-use crate::errors::LlvmError;
+use crate::errors::{AutoDiffWithoutEnable, LlvmError};
 use crate::llvm::AttributePlace::Function;
 use crate::llvm::{Metadata, True};
 use crate::value::Value;
 use crate::{CodegenContext, LlvmCodegenBackend, ModuleLlvm, attributes, llvm};
 
 fn get_params(fnc: &Value) -> Vec<&Value> {
+    let param_num = llvm::LLVMCountParams(fnc) as usize;
+    let mut fnc_args: Vec<&Value> = vec![];
+    fnc_args.reserve(param_num);
     unsafe {
-        let param_num = llvm::LLVMCountParams(fnc) as usize;
-        let mut fnc_args: Vec<&Value> = vec![];
-        fnc_args.reserve(param_num);
         llvm::LLVMGetParams(fnc, fnc_args.as_mut_ptr());
         fnc_args.set_len(param_num);
-        fnc_args
     }
+    fnc_args
+}
+
+fn has_sret(fnc: &Value) -> bool {
+    let num_args = llvm::LLVMCountParams(fnc) as usize;
+    if num_args == 0 {
+        false
+    } else {
+        unsafe { llvm::LLVMRustHasAttributeAtIndex(fnc, 0, llvm::AttributeKind::StructRet) }
+    }
+}
+
+// When we call the `__enzyme_autodiff` or `__enzyme_fwddiff` function, we need to pass all the
+// original inputs, as well as metadata and the additional shadow arguments.
+// This function matches the arguments from the outer function to the inner enzyme call.
+//
+// This function also considers that Rust level arguments not always match the llvm-ir level
+// arguments. A slice, `&[f32]`, for example, is represented as a pointer and a length on
+// llvm-ir level. The number of activities matches the number of Rust level arguments, so we
+// need to match those.
+// FIXME(ZuseZ4): This logic is a bit more complicated than it should be, can we simplify it
+// using iterators and peek()?
+fn match_args_from_caller_to_enzyme<'ll>(
+    cx: &SimpleCx<'ll>,
+    width: u32,
+    args: &mut Vec<&'ll llvm::Value>,
+    inputs: &[DiffActivity],
+    outer_args: &[&'ll llvm::Value],
+    has_sret: bool,
+) {
+    debug!("matching autodiff arguments");
+    // We now handle the issue that Rust level arguments not always match the llvm-ir level
+    // arguments. A slice, `&[f32]`, for example, is represented as a pointer and a length on
+    // llvm-ir level. The number of activities matches the number of Rust level arguments, so we
+    // need to match those.
+    // FIXME(ZuseZ4): This logic is a bit more complicated than it should be, can we simplify it
+    // using iterators and peek()?
+    let mut outer_pos: usize = 0;
+    let mut activity_pos = 0;
+
+    if has_sret {
+        // Then the first outer arg is the sret pointer. Enzyme doesn't know about sret, so the
+        // inner function will still return something. We increase our outer_pos by one,
+        // and once we're done with all other args we will take the return of the inner call and
+        // update the sret pointer with it
+        outer_pos = 1;
+    }
+
+    let enzyme_const = cx.create_metadata("enzyme_const".to_string()).unwrap();
+    let enzyme_out = cx.create_metadata("enzyme_out".to_string()).unwrap();
+    let enzyme_dup = cx.create_metadata("enzyme_dup".to_string()).unwrap();
+    let enzyme_dupnoneed = cx.create_metadata("enzyme_dupnoneed".to_string()).unwrap();
+
+    while activity_pos < inputs.len() {
+        let diff_activity = inputs[activity_pos as usize];
+        // Duplicated arguments received a shadow argument, into which enzyme will write the
+        // gradient.
+        let (activity, duplicated): (&Metadata, bool) = match diff_activity {
+            DiffActivity::None => panic!("not a valid input activity"),
+            DiffActivity::Const => (enzyme_const, false),
+            DiffActivity::Active => (enzyme_out, false),
+            DiffActivity::ActiveOnly => (enzyme_out, false),
+            DiffActivity::Dual => (enzyme_dup, true),
+            DiffActivity::DualOnly => (enzyme_dupnoneed, true),
+            DiffActivity::Duplicated => (enzyme_dup, true),
+            DiffActivity::DuplicatedOnly => (enzyme_dupnoneed, true),
+            DiffActivity::FakeActivitySize => (enzyme_const, false),
+        };
+        let outer_arg = outer_args[outer_pos];
+        args.push(cx.get_metadata_value(activity));
+        args.push(outer_arg);
+        if duplicated {
+            // We know that duplicated args by construction have a following argument,
+            // so this can not be out of bounds.
+            let next_outer_arg = outer_args[outer_pos + 1];
+            let next_outer_ty = cx.val_ty(next_outer_arg);
+            // FIXME(ZuseZ4): We should add support for Vec here too, but it's less urgent since
+            // vectors behind references (&Vec<T>) are already supported. Users can not pass a
+            // Vec by value for reverse mode, so this would only help forward mode autodiff.
+            let slice = {
+                if activity_pos + 1 >= inputs.len() {
+                    // If there is no arg following our ptr, it also can't be a slice,
+                    // since that would lead to a ptr, int pair.
+                    false
+                } else {
+                    let next_activity = inputs[activity_pos + 1];
+                    // We analyze the MIR types and add this dummy activity if we visit a slice.
+                    next_activity == DiffActivity::FakeActivitySize
+                }
+            };
+            if slice {
+                // A duplicated slice will have the following two outer_fn arguments:
+                // (..., ptr1, int1, ptr2, int2, ...). We add the following llvm-ir to our __enzyme call:
+                // (..., metadata! enzyme_dup, ptr, ptr, int1, ...).
+                // FIXME(ZuseZ4): We will upstream a safety check later which asserts that
+                // int2 >= int1, which means the shadow vector is large enough to store the gradient.
+                assert_eq!(cx.type_kind(next_outer_ty), TypeKind::Integer);
+
+                for i in 0..(width as usize) {
+                    let next_outer_arg2 = outer_args[outer_pos + 2 * (i + 1)];
+                    let next_outer_ty2 = cx.val_ty(next_outer_arg2);
+                    assert_eq!(cx.type_kind(next_outer_ty2), TypeKind::Pointer);
+                    let next_outer_arg3 = outer_args[outer_pos + 2 * (i + 1) + 1];
+                    let next_outer_ty3 = cx.val_ty(next_outer_arg3);
+                    assert_eq!(cx.type_kind(next_outer_ty3), TypeKind::Integer);
+                    args.push(next_outer_arg2);
+                }
+                args.push(cx.get_metadata_value(enzyme_const));
+                args.push(next_outer_arg);
+                outer_pos += 2 + 2 * width as usize;
+                activity_pos += 2;
+            } else {
+                // A duplicated pointer will have the following two outer_fn arguments:
+                // (..., ptr, ptr, ...). We add the following llvm-ir to our __enzyme call:
+                // (..., metadata! enzyme_dup, ptr, ptr, ...).
+                if matches!(diff_activity, DiffActivity::Duplicated | DiffActivity::DuplicatedOnly)
+                {
+                    assert_eq!(cx.type_kind(next_outer_ty), TypeKind::Pointer);
+                }
+                // In the case of Dual we don't have assumptions, e.g. f32 would be valid.
+                args.push(next_outer_arg);
+                outer_pos += 2;
+                activity_pos += 1;
+
+                // Now, if width > 1, we need to account for that
+                for _ in 1..width {
+                    let next_outer_arg = outer_args[outer_pos];
+                    args.push(next_outer_arg);
+                    outer_pos += 1;
+                }
+            }
+        } else {
+            // We do not differentiate with resprect to this argument.
+            // We already added the metadata and argument above, so just increase the counters.
+            outer_pos += 1;
+            activity_pos += 1;
+        }
+    }
+}
+
+// On LLVM-IR, we can luckily declare __enzyme_ functions without specifying the input
+// arguments. We do however need to declare them with their correct return type.
+// We already figured the correct return type out in our frontend, when generating the outer_fn,
+// so we can now just go ahead and use that. This is not always trivial, e.g. because sret.
+// Beyond sret, this article describes our challenges nicely:
+// <https://yorickpeterse.com/articles/the-mess-that-is-handling-structure-arguments-and-returns-in-llvm/>
+// I.e. (i32, f32) will get merged into i64, but we don't handle that yet.
+fn compute_enzyme_fn_ty<'ll>(
+    cx: &SimpleCx<'ll>,
+    attrs: &AutoDiffAttrs,
+    fn_to_diff: &'ll Value,
+    outer_fn: &'ll Value,
+) -> &'ll llvm::Type {
+    let fn_ty = cx.get_type_of_global(outer_fn);
+    let mut ret_ty = cx.get_return_type(fn_ty);
+
+    let has_sret = has_sret(outer_fn);
+
+    if has_sret {
+        // Now we don't just forward the return type, so we have to figure it out based on the
+        // primal return type, in combination with the autodiff settings.
+        let fn_ty = cx.get_type_of_global(fn_to_diff);
+        let inner_ret_ty = cx.get_return_type(fn_ty);
+
+        let void_ty = unsafe { llvm::LLVMVoidTypeInContext(cx.llcx) };
+        if inner_ret_ty == void_ty {
+            // This indicates that even the inner function has an sret.
+            // Right now I only look for an sret in the outer function.
+            // This *probably* needs some extra handling, but I never ran
+            // into such a case. So I'll wait for user reports to have a test case.
+            bug!("sret in inner function");
+        }
+
+        if attrs.width == 1 {
+            todo!("Handle sret for scalar ad");
+        } else {
+            // First we check if we also have to deal with the primal return.
+            match attrs.mode {
+                DiffMode::Forward => match attrs.ret_activity {
+                    DiffActivity::Dual => {
+                        let arr_ty =
+                            unsafe { llvm::LLVMArrayType2(inner_ret_ty, attrs.width as u64 + 1) };
+                        ret_ty = arr_ty;
+                    }
+                    DiffActivity::DualOnly => {
+                        let arr_ty =
+                            unsafe { llvm::LLVMArrayType2(inner_ret_ty, attrs.width as u64) };
+                        ret_ty = arr_ty;
+                    }
+                    DiffActivity::Const => {
+                        todo!("Not sure, do we need to do something here?");
+                    }
+                    _ => {
+                        bug!("unreachable");
+                    }
+                },
+                DiffMode::Reverse => {
+                    todo!("Handle sret for reverse mode");
+                }
+                _ => {
+                    bug!("unreachable");
+                }
+            }
+        }
+    }
+
+    // LLVM can figure out the input types on it's own, so we take a shortcut here.
+    unsafe { llvm::LLVMFunctionType(ret_ty, ptr::null(), 0, True) }
 }
 
 /// When differentiating `fn_to_diff`, take a `outer_fn` and generate another
@@ -43,18 +252,10 @@ fn generate_enzyme_call<'ll>(
     outer_fn: &'ll Value,
     attrs: AutoDiffAttrs,
 ) {
-    let inputs = attrs.input_activity;
-    let output = attrs.ret_activity;
-
     // We have to pick the name depending on whether we want forward or reverse mode autodiff.
-    // FIXME(ZuseZ4): The new pass based approach should not need the {Forward/Reverse}First method anymore, since
-    // it will handle higher-order derivatives correctly automatically (in theory). Currently
-    // higher-order derivatives fail, so we should debug that before adjusting this code.
     let mut ad_name: String = match attrs.mode {
         DiffMode::Forward => "__enzyme_fwddiff",
         DiffMode::Reverse => "__enzyme_autodiff",
-        DiffMode::ForwardFirst => "__enzyme_fwddiff",
-        DiffMode::ReverseFirst => "__enzyme_autodiff",
         _ => panic!("logic bug in autodiff, unrecognized mode"),
     }
     .to_string();
@@ -98,17 +299,9 @@ fn generate_enzyme_call<'ll>(
     // }
     // ```
     unsafe {
-        // On LLVM-IR, we can luckily declare __enzyme_ functions without specifying the input
-        // arguments. We do however need to declare them with their correct return type.
-        // We already figured the correct return type out in our frontend, when generating the outer_fn,
-        // so we can now just go ahead and use that. FIXME(ZuseZ4): This doesn't handle sret yet.
-        let fn_ty = llvm::LLVMGlobalGetValueType(outer_fn);
-        let ret_ty = llvm::LLVMGetReturnType(fn_ty);
+        let enzyme_ty = compute_enzyme_fn_ty(cx, &attrs, fn_to_diff, outer_fn);
 
-        // LLVM can figure out the input types on it's own, so we take a shortcut here.
-        let enzyme_ty = llvm::LLVMFunctionType(ret_ty, ptr::null(), 0, True);
-
-        //FIXME(ZuseZ4): the CC/Addr/Vis values are best effort guesses, we should look at tests and
+        // FIXME(ZuseZ4): the CC/Addr/Vis values are best effort guesses, we should look at tests and
         // think a bit more about what should go here.
         let cc = llvm::LLVMGetFunctionCallConv(outer_fn);
         let ad_fn = declare_simple_fn(
@@ -137,108 +330,31 @@ fn generate_enzyme_call<'ll>(
         let mut args = Vec::with_capacity(num_args as usize + 1);
         args.push(fn_to_diff);
 
-        let enzyme_const = cx.create_metadata("enzyme_const".to_string()).unwrap();
-        let enzyme_out = cx.create_metadata("enzyme_out".to_string()).unwrap();
-        let enzyme_dup = cx.create_metadata("enzyme_dup".to_string()).unwrap();
-        let enzyme_dupnoneed = cx.create_metadata("enzyme_dupnoneed".to_string()).unwrap();
         let enzyme_primal_ret = cx.create_metadata("enzyme_primal_return".to_string()).unwrap();
-
-        match output {
-            DiffActivity::Dual => {
-                args.push(cx.get_metadata_value(enzyme_primal_ret));
-            }
-            DiffActivity::Active => {
-                args.push(cx.get_metadata_value(enzyme_primal_ret));
-            }
-            _ => {}
+        if matches!(attrs.ret_activity, DiffActivity::Dual | DiffActivity::Active) {
+            args.push(cx.get_metadata_value(enzyme_primal_ret));
+        }
+        if attrs.width > 1 {
+            let enzyme_width = cx.create_metadata("enzyme_width".to_string()).unwrap();
+            args.push(cx.get_metadata_value(enzyme_width));
+            args.push(cx.get_const_i64(attrs.width as u64));
         }
 
-        trace!("matching autodiff arguments");
-        // We now handle the issue that Rust level arguments not always match the llvm-ir level
-        // arguments. A slice, `&[f32]`, for example, is represented as a pointer and a length on
-        // llvm-ir level. The number of activities matches the number of Rust level arguments, so we
-        // need to match those.
-        // FIXME(ZuseZ4): This logic is a bit more complicated than it should be, can we simplify it
-        // using iterators and peek()?
-        let mut outer_pos: usize = 0;
-        let mut activity_pos = 0;
+        let has_sret = has_sret(outer_fn);
         let outer_args: Vec<&llvm::Value> = get_params(outer_fn);
-        while activity_pos < inputs.len() {
-            let activity = inputs[activity_pos as usize];
-            // Duplicated arguments received a shadow argument, into which enzyme will write the
-            // gradient.
-            let (activity, duplicated): (&Metadata, bool) = match activity {
-                DiffActivity::None => panic!("not a valid input activity"),
-                DiffActivity::Const => (enzyme_const, false),
-                DiffActivity::Active => (enzyme_out, false),
-                DiffActivity::ActiveOnly => (enzyme_out, false),
-                DiffActivity::Dual => (enzyme_dup, true),
-                DiffActivity::DualOnly => (enzyme_dupnoneed, true),
-                DiffActivity::Duplicated => (enzyme_dup, true),
-                DiffActivity::DuplicatedOnly => (enzyme_dupnoneed, true),
-                DiffActivity::FakeActivitySize => (enzyme_const, false),
-            };
-            let outer_arg = outer_args[outer_pos];
-            args.push(cx.get_metadata_value(activity));
-            args.push(outer_arg);
-            if duplicated {
-                // We know that duplicated args by construction have a following argument,
-                // so this can not be out of bounds.
-                let next_outer_arg = outer_args[outer_pos + 1];
-                let next_outer_ty = cx.val_ty(next_outer_arg);
-                // FIXME(ZuseZ4): We should add support for Vec here too, but it's less urgent since
-                // vectors behind references (&Vec<T>) are already supported. Users can not pass a
-                // Vec by value for reverse mode, so this would only help forward mode autodiff.
-                let slice = {
-                    if activity_pos + 1 >= inputs.len() {
-                        // If there is no arg following our ptr, it also can't be a slice,
-                        // since that would lead to a ptr, int pair.
-                        false
-                    } else {
-                        let next_activity = inputs[activity_pos + 1];
-                        // We analyze the MIR types and add this dummy activity if we visit a slice.
-                        next_activity == DiffActivity::FakeActivitySize
-                    }
-                };
-                if slice {
-                    // A duplicated slice will have the following two outer_fn arguments:
-                    // (..., ptr1, int1, ptr2, int2, ...). We add the following llvm-ir to our __enzyme call:
-                    // (..., metadata! enzyme_dup, ptr, ptr, int1, ...).
-                    // FIXME(ZuseZ4): We will upstream a safety check later which asserts that
-                    // int2 >= int1, which means the shadow vector is large enough to store the gradient.
-                    assert!(llvm::LLVMRustGetTypeKind(next_outer_ty) == llvm::TypeKind::Integer);
-                    let next_outer_arg2 = outer_args[outer_pos + 2];
-                    let next_outer_ty2 = cx.val_ty(next_outer_arg2);
-                    assert!(llvm::LLVMRustGetTypeKind(next_outer_ty2) == llvm::TypeKind::Pointer);
-                    let next_outer_arg3 = outer_args[outer_pos + 3];
-                    let next_outer_ty3 = cx.val_ty(next_outer_arg3);
-                    assert!(llvm::LLVMRustGetTypeKind(next_outer_ty3) == llvm::TypeKind::Integer);
-                    args.push(next_outer_arg2);
-                    args.push(cx.get_metadata_value(enzyme_const));
-                    args.push(next_outer_arg);
-                    outer_pos += 4;
-                    activity_pos += 2;
-                } else {
-                    // A duplicated pointer will have the following two outer_fn arguments:
-                    // (..., ptr, ptr, ...). We add the following llvm-ir to our __enzyme call:
-                    // (..., metadata! enzyme_dup, ptr, ptr, ...).
-                    assert!(llvm::LLVMRustGetTypeKind(next_outer_ty) == llvm::TypeKind::Pointer);
-                    args.push(next_outer_arg);
-                    outer_pos += 2;
-                    activity_pos += 1;
-                }
-            } else {
-                // We do not differentiate with resprect to this argument.
-                // We already added the metadata and argument above, so just increase the counters.
-                outer_pos += 1;
-                activity_pos += 1;
-            }
-        }
+        match_args_from_caller_to_enzyme(
+            &cx,
+            attrs.width,
+            &mut args,
+            &attrs.input_activity,
+            &outer_args,
+            has_sret,
+        );
 
         let call = builder.call(enzyme_ty, ad_fn, &args, None);
 
         // This part is a bit iffy. LLVM requires that a call to an inlineable function has some
-        // metadata attachted to it, but we just created this code oota. Given that the
+        // metadata attached to it, but we just created this code oota. Given that the
         // differentiated function already has partly confusing metadata, and given that this
         // affects nothing but the auttodiff IR, we take a shortcut and just steal metadata from the
         // dummy code which we inserted at a higher level.
@@ -259,7 +375,22 @@ fn generate_enzyme_call<'ll>(
         // Now that we copied the metadata, get rid of dummy code.
         llvm::LLVMRustEraseInstUntilInclusive(entry, last_inst);
 
-        if cx.val_ty(call) == cx.type_void() {
+        if cx.val_ty(call) == cx.type_void() || has_sret {
+            if has_sret {
+                // This is what we already have in our outer_fn (shortened):
+                // define void @_foo(ptr <..> sret([32 x i8]) initializes((0, 32)) %0, <...>) {
+                //   %7 = call [4 x double] (...) @__enzyme_fwddiff_foo(ptr @square, metadata !"enzyme_width", i64 4, <...>)
+                //   <Here we are, we want to add the following two lines>
+                //   store [4 x double] %7, ptr %0, align 8
+                //   ret void
+                // }
+
+                // now store the result of the enzyme call into the sret pointer.
+                let sret_ptr = outer_args[0];
+                let call_ty = cx.val_ty(call);
+                assert_eq!(cx.type_kind(call_ty), TypeKind::Array);
+                llvm::LLVMBuildStore(&builder.llbuilder, call, sret_ptr);
+            }
             builder.ret_void();
         } else {
             builder.ret(call);
@@ -277,34 +408,48 @@ pub(crate) fn differentiate<'ll>(
     module: &'ll ModuleCodegen<ModuleLlvm>,
     cgcx: &CodegenContext<LlvmCodegenBackend>,
     diff_items: Vec<AutoDiffItem>,
-    config: &ModuleConfig,
+    _config: &ModuleConfig,
 ) -> Result<(), FatalError> {
     for item in &diff_items {
         trace!("{}", item);
     }
 
     let diag_handler = cgcx.create_dcx();
-    let cx = SimpleCx { llmod: module.module_llvm.llmod(), llcx: module.module_llvm.llcx };
+
+    let cx = SimpleCx::new(module.module_llvm.llmod(), module.module_llvm.llcx, cgcx.pointer_size);
+
+    // First of all, did the user try to use autodiff without using the -Zautodiff=Enable flag?
+    if !diff_items.is_empty()
+        && !cgcx.opts.unstable_opts.autodiff.contains(&rustc_session::config::AutoDiff::Enable)
+    {
+        return Err(diag_handler.handle().emit_almost_fatal(AutoDiffWithoutEnable));
+    }
 
     // Before dumping the module, we want all the TypeTrees to become part of the module.
     for item in diff_items.iter() {
         let name = item.source.clone();
         let fn_def: Option<&llvm::Value> = cx.get_function(&name);
         let Some(fn_def) = fn_def else {
-            return Err(llvm_err(diag_handler.handle(), LlvmError::PrepareAutoDiff {
-                src: item.source.clone(),
-                target: item.target.clone(),
-                error: "could not find source function".to_owned(),
-            }));
+            return Err(llvm_err(
+                diag_handler.handle(),
+                LlvmError::PrepareAutoDiff {
+                    src: item.source.clone(),
+                    target: item.target.clone(),
+                    error: "could not find source function".to_owned(),
+                },
+            ));
         };
         debug!(?item.target);
         let fn_target: Option<&llvm::Value> = cx.get_function(&item.target);
         let Some(fn_target) = fn_target else {
-            return Err(llvm_err(diag_handler.handle(), LlvmError::PrepareAutoDiff {
-                src: item.source.clone(),
-                target: item.target.clone(),
-                error: "could not find target function".to_owned(),
-            }));
+            return Err(llvm_err(
+                diag_handler.handle(),
+                LlvmError::PrepareAutoDiff {
+                    src: item.source.clone(),
+                    target: item.target.clone(),
+                    error: "could not find target function".to_owned(),
+                },
+            ));
         };
 
         generate_enzyme_call(&cx, fn_def, fn_target, item.attrs.clone());
@@ -312,29 +457,6 @@ pub(crate) fn differentiate<'ll>(
 
     // FIXME(ZuseZ4): support SanitizeHWAddress and prevent illegal/unsupported opts
 
-    if let Some(opt_level) = config.opt_level {
-        let opt_stage = match cgcx.lto {
-            Lto::Fat => llvm::OptStage::PreLinkFatLTO,
-            Lto::Thin | Lto::ThinLocal => llvm::OptStage::PreLinkThinLTO,
-            _ if cgcx.opts.cg.linker_plugin_lto.enabled() => llvm::OptStage::PreLinkThinLTO,
-            _ => llvm::OptStage::PreLinkNoLTO,
-        };
-        // This is our second opt call, so now we run all opts,
-        // to make sure we get the best performance.
-        let skip_size_increasing_opts = false;
-        trace!("running Module Optimization after differentiation");
-        unsafe {
-            llvm_optimize(
-                cgcx,
-                diag_handler.handle(),
-                module,
-                config,
-                opt_level,
-                opt_stage,
-                skip_size_increasing_opts,
-            )?
-        };
-    }
     trace!("done with differentiate()");
 
     Ok(())
diff --git a/compiler/rustc_codegen_llvm/src/callee.rs b/compiler/rustc_codegen_llvm/src/callee.rs
index aa9a0f34f55..ea9ab5c02bd 100644
--- a/compiler/rustc_codegen_llvm/src/callee.rs
+++ b/compiler/rustc_codegen_llvm/src/callee.rs
@@ -66,9 +66,7 @@ pub(crate) fn get_fn<'ll, 'tcx>(cx: &CodegenCx<'ll, 'tcx>, instance: Instance<'t
             // LLVM will prefix the name with `__imp_`. Ideally, we'd like the
             // existing logic below to set the Storage Class, but it has an
             // exemption for MinGW for backwards compatibility.
-            unsafe {
-                llvm::LLVMSetDLLStorageClass(llfn, llvm::DLLStorageClass::DllImport);
-            }
+            llvm::set_dllimport_storage_class(llfn);
             llfn
         } else {
             cx.declare_fn(sym, fn_abi, Some(instance))
@@ -99,65 +97,61 @@ pub(crate) fn get_fn<'ll, 'tcx>(cx: &CodegenCx<'ll, 'tcx>, instance: Instance<'t
         // has been applied to the definition (wherever that definition may be).
 
         llvm::set_linkage(llfn, llvm::Linkage::ExternalLinkage);
-        unsafe {
-            let is_generic = instance.args.non_erasable_generics().next().is_some();
-
-            let is_hidden = if is_generic {
-                // This is a monomorphization of a generic function.
-                if !(cx.tcx.sess.opts.share_generics()
-                    || tcx.codegen_fn_attrs(instance_def_id).inline
-                        == rustc_attr_parsing::InlineAttr::Never)
-                {
-                    // When not sharing generics, all instances are in the same
-                    // crate and have hidden visibility.
-                    true
-                } else {
-                    if let Some(instance_def_id) = instance_def_id.as_local() {
-                        // This is a monomorphization of a generic function
-                        // defined in the current crate. It is hidden if:
-                        // - the definition is unreachable for downstream
-                        //   crates, or
-                        // - the current crate does not re-export generics
-                        //   (because the crate is a C library or executable)
-                        cx.tcx.is_unreachable_local_definition(instance_def_id)
-                            || !cx.tcx.local_crate_exports_generics()
-                    } else {
-                        // This is a monomorphization of a generic function
-                        // defined in an upstream crate. It is hidden if:
-                        // - it is instantiated in this crate, and
-                        // - the current crate does not re-export generics
-                        instance.upstream_monomorphization(tcx).is_none()
-                            && !cx.tcx.local_crate_exports_generics()
-                    }
-                }
-            } else {
-                // This is a non-generic function. It is hidden if:
-                // - it is instantiated in the local crate, and
-                //   - it is defined an upstream crate (non-local), or
-                //   - it is not reachable
-                cx.tcx.is_codegened_item(instance_def_id)
-                    && (!instance_def_id.is_local()
-                        || !cx.tcx.is_reachable_non_generic(instance_def_id))
-            };
-            if is_hidden {
-                llvm::set_visibility(llfn, llvm::Visibility::Hidden);
-            }
+        let is_generic = instance.args.non_erasable_generics().next().is_some();
 
-            // MinGW: For backward compatibility we rely on the linker to decide whether it
-            // should use dllimport for functions.
-            if cx.use_dll_storage_attrs
-                && let Some(library) = tcx.native_library(instance_def_id)
-                && library.kind.is_dllimport()
-                && !matches!(tcx.sess.target.env.as_ref(), "gnu" | "uclibc")
+        let is_hidden = if is_generic {
+            // This is a monomorphization of a generic function.
+            if !(cx.tcx.sess.opts.share_generics()
+                || tcx.codegen_fn_attrs(instance_def_id).inline
+                    == rustc_attr_parsing::InlineAttr::Never)
             {
-                llvm::LLVMSetDLLStorageClass(llfn, llvm::DLLStorageClass::DllImport);
+                // When not sharing generics, all instances are in the same
+                // crate and have hidden visibility.
+                true
+            } else {
+                if let Some(instance_def_id) = instance_def_id.as_local() {
+                    // This is a monomorphization of a generic function
+                    // defined in the current crate. It is hidden if:
+                    // - the definition is unreachable for downstream
+                    //   crates, or
+                    // - the current crate does not re-export generics
+                    //   (because the crate is a C library or executable)
+                    cx.tcx.is_unreachable_local_definition(instance_def_id)
+                        || !cx.tcx.local_crate_exports_generics()
+                } else {
+                    // This is a monomorphization of a generic function
+                    // defined in an upstream crate. It is hidden if:
+                    // - it is instantiated in this crate, and
+                    // - the current crate does not re-export generics
+                    instance.upstream_monomorphization(tcx).is_none()
+                        && !cx.tcx.local_crate_exports_generics()
+                }
             }
+        } else {
+            // This is a non-generic function. It is hidden if:
+            // - it is instantiated in the local crate, and
+            //   - it is defined an upstream crate (non-local), or
+            //   - it is not reachable
+            cx.tcx.is_codegened_item(instance_def_id)
+                && (!instance_def_id.is_local()
+                    || !cx.tcx.is_reachable_non_generic(instance_def_id))
+        };
+        if is_hidden {
+            llvm::set_visibility(llfn, llvm::Visibility::Hidden);
+        }
 
-            if cx.should_assume_dso_local(llfn, true) {
-                llvm::LLVMRustSetDSOLocal(llfn, true);
-            }
+        // MinGW: For backward compatibility we rely on the linker to decide whether it
+        // should use dllimport for functions.
+        if cx.use_dll_storage_attrs
+            && let Some(library) = tcx.native_library(instance_def_id)
+            && library.kind.is_dllimport()
+            && !matches!(tcx.sess.target.env.as_ref(), "gnu" | "uclibc")
+        {
+            llvm::set_dllimport_storage_class(llfn);
         }
 
+        cx.assume_dso_local(llfn, true);
+
         llfn
     };
 
diff --git a/compiler/rustc_codegen_llvm/src/common.rs b/compiler/rustc_codegen_llvm/src/common.rs
index 8c94a46ebf3..457e5452ce9 100644
--- a/compiler/rustc_codegen_llvm/src/common.rs
+++ b/compiler/rustc_codegen_llvm/src/common.rs
@@ -1,5 +1,7 @@
 //! Code that is useful in various codegen modules.
 
+use std::borrow::Borrow;
+
 use libc::{c_char, c_uint};
 use rustc_abi as abi;
 use rustc_abi::Primitive::Pointer;
@@ -7,7 +9,8 @@ use rustc_abi::{AddressSpace, HasDataLayout};
 use rustc_ast::Mutability;
 use rustc_codegen_ssa::common::TypeKind;
 use rustc_codegen_ssa::traits::*;
-use rustc_data_structures::stable_hasher::{Hash128, HashStable, StableHasher};
+use rustc_data_structures::stable_hasher::{HashStable, StableHasher};
+use rustc_hashes::Hash128;
 use rustc_hir::def_id::DefId;
 use rustc_middle::bug;
 use rustc_middle::mir::interpret::{ConstAllocation, GlobalAlloc, Scalar};
@@ -17,6 +20,7 @@ use tracing::debug;
 
 use crate::consts::const_alloc_to_llvm;
 pub(crate) use crate::context::CodegenCx;
+use crate::context::{GenericCx, SCx};
 use crate::llvm::{self, BasicBlock, Bool, ConstantInt, False, Metadata, True};
 use crate::type_::Type;
 use crate::value::Value;
@@ -76,11 +80,11 @@ impl<'ll> Funclet<'ll> {
     }
 
     pub(crate) fn bundle(&self) -> &llvm::OperandBundle<'ll> {
-        &self.operand
+        self.operand.raw()
     }
 }
 
-impl<'ll> BackendTypes for CodegenCx<'ll, '_> {
+impl<'ll, CX: Borrow<SCx<'ll>>> BackendTypes for GenericCx<'ll, CX> {
     type Value = &'ll Value;
     type Metadata = &'ll Metadata;
     // FIXME(eddyb) replace this with a `Function` "subclass" of `Value`.
@@ -117,7 +121,7 @@ impl<'ll> CodegenCx<'ll, '_> {
     }
 }
 
-impl<'ll, 'tcx> ConstCodegenMethods<'tcx> for CodegenCx<'ll, 'tcx> {
+impl<'ll, 'tcx> ConstCodegenMethods for CodegenCx<'ll, 'tcx> {
     fn const_null(&self, t: &'ll Type) -> &'ll Value {
         unsafe { llvm::LLVMConstNull(t) }
     }
@@ -126,10 +130,6 @@ impl<'ll, 'tcx> ConstCodegenMethods<'tcx> for CodegenCx<'ll, 'tcx> {
         unsafe { llvm::LLVMGetUndef(t) }
     }
 
-    fn is_undef(&self, v: &'ll Value) -> bool {
-        unsafe { llvm::LLVMIsUndef(v) == True }
-    }
-
     fn const_poison(&self, t: &'ll Type) -> &'ll Value {
         unsafe { llvm::LLVMGetPoison(t) }
     }
@@ -208,28 +208,24 @@ impl<'ll, 'tcx> ConstCodegenMethods<'tcx> for CodegenCx<'ll, 'tcx> {
     }
 
     fn const_str(&self, s: &str) -> (&'ll Value, &'ll Value) {
-        let str_global = *self
-            .const_str_cache
-            .borrow_mut()
-            .raw_entry_mut()
-            .from_key(s)
-            .or_insert_with(|| {
-                let sc = self.const_bytes(s.as_bytes());
-                let sym = self.generate_local_symbol_name("str");
-                let g = self.define_global(&sym, self.val_ty(sc)).unwrap_or_else(|| {
-                    bug!("symbol `{}` is already defined", sym);
-                });
-                unsafe {
-                    llvm::LLVMSetInitializer(g, sc);
-                    llvm::LLVMSetGlobalConstant(g, True);
-                    llvm::LLVMSetUnnamedAddress(g, llvm::UnnamedAddr::Global);
-                }
-                llvm::set_linkage(g, llvm::Linkage::InternalLinkage);
-                // Cast to default address space if globals are in a different addrspace
-                let g = self.const_pointercast(g, self.type_ptr());
-                (s.to_owned(), g)
-            })
-            .1;
+        let mut const_str_cache = self.const_str_cache.borrow_mut();
+        let str_global = const_str_cache.get(s).copied().unwrap_or_else(|| {
+            let sc = self.const_bytes(s.as_bytes());
+            let sym = self.generate_local_symbol_name("str");
+            let g = self.define_global(&sym, self.val_ty(sc)).unwrap_or_else(|| {
+                bug!("symbol `{}` is already defined", sym);
+            });
+            llvm::set_initializer(g, sc);
+            unsafe {
+                llvm::LLVMSetGlobalConstant(g, True);
+                llvm::LLVMSetUnnamedAddress(g, llvm::UnnamedAddr::Global);
+            }
+            llvm::set_linkage(g, llvm::Linkage::InternalLinkage);
+            // Cast to default address space if globals are in a different addrspace
+            let g = self.const_pointercast(g, self.type_ptr());
+            const_str_cache.insert(s.to_owned(), g);
+            g
+        });
         let len = s.len();
         (str_global, self.const_usize(len as u64))
     }
@@ -349,7 +345,7 @@ impl<'ll, 'tcx> ConstCodegenMethods<'tcx> for CodegenCx<'ll, 'tcx> {
         }
     }
 
-    fn const_data_from_alloc(&self, alloc: ConstAllocation<'tcx>) -> Self::Value {
+    fn const_data_from_alloc(&self, alloc: ConstAllocation<'_>) -> Self::Value {
         const_alloc_to_llvm(self, alloc, /*static*/ false)
     }
 
diff --git a/compiler/rustc_codegen_llvm/src/consts.rs b/compiler/rustc_codegen_llvm/src/consts.rs
index c6855dd42e5..bf81eb648f8 100644
--- a/compiler/rustc_codegen_llvm/src/consts.rs
+++ b/compiler/rustc_codegen_llvm/src/consts.rs
@@ -5,6 +5,7 @@ use rustc_abi::{
 };
 use rustc_codegen_ssa::common;
 use rustc_codegen_ssa::traits::*;
+use rustc_hir::LangItem;
 use rustc_hir::def::DefKind;
 use rustc_hir::def_id::DefId;
 use rustc_middle::middle::codegen_fn_attrs::{CodegenFnAttrFlags, CodegenFnAttrs};
@@ -12,11 +13,10 @@ use rustc_middle::mir::interpret::{
     Allocation, ConstAllocation, ErrorHandled, InitChunk, Pointer, Scalar as InterpScalar,
     read_target_uint,
 };
-use rustc_middle::mir::mono::MonoItem;
-use rustc_middle::ty::Instance;
+use rustc_middle::mir::mono::{Linkage, MonoItem};
 use rustc_middle::ty::layout::{HasTypingEnv, LayoutOf};
+use rustc_middle::ty::{self, Instance};
 use rustc_middle::{bug, span_bug};
-use rustc_session::config::Lto;
 use tracing::{debug, instrument, trace};
 
 use crate::common::{AsCCharPtr, CodegenCx};
@@ -129,7 +129,12 @@ pub(crate) fn const_alloc_to_llvm<'ll>(
         append_chunks_of_init_and_uninit_bytes(&mut llvals, cx, alloc, range);
     }
 
-    cx.const_struct(&llvals, true)
+    // Avoid wrapping in a struct if there is only a single value. This ensures
+    // that LLVM is able to perform the string merging optimization if the constant
+    // is a valid C string. LLVM only considers bare arrays for this optimization,
+    // not arrays wrapped in a struct. LLVM handles this at:
+    // https://github.com/rust-lang/llvm-project/blob/acaea3d2bb8f351b740db7ebce7d7a40b9e21488/llvm/lib/Target/TargetLoweringObjectFile.cpp#L249-L280
+    if let &[data] = &*llvals { data } else { cx.const_struct(&llvals, true) }
 }
 
 fn codegen_static_initializer<'ll, 'tcx>(
@@ -172,8 +177,27 @@ fn check_and_apply_linkage<'ll, 'tcx>(
     if let Some(linkage) = attrs.import_linkage {
         debug!("get_static: sym={} linkage={:?}", sym, linkage);
 
-        // Declare a symbol `foo` with the desired linkage.
-        let g1 = cx.declare_global(sym, cx.type_i8());
+        // Declare a symbol `foo`. If `foo` is an extern_weak symbol, we declare
+        // an extern_weak function, otherwise a global with the desired linkage.
+        let g1 = if matches!(attrs.import_linkage, Some(Linkage::ExternalWeak)) {
+            // An `extern_weak` function is represented as an `Option<unsafe extern ...>`,
+            // we extract the function signature and declare it as an extern_weak function
+            // instead of an extern_weak i8.
+            let instance = Instance::mono(cx.tcx, def_id);
+            if let ty::Adt(struct_def, args) = instance.ty(cx.tcx, cx.typing_env()).kind()
+                && cx.tcx.is_lang_item(struct_def.did(), LangItem::Option)
+                && let ty::FnPtr(sig, header) = args.type_at(0).kind()
+            {
+                let fn_sig = sig.with(*header);
+
+                let fn_abi = cx.fn_abi_of_fn_ptr(fn_sig, ty::List::empty());
+                cx.declare_fn(sym, &fn_abi, None)
+            } else {
+                cx.declare_global(sym, cx.type_i8())
+            }
+        } else {
+            cx.declare_global(sym, cx.type_i8())
+        };
         llvm::set_linkage(g1, base::linkage_to_llvm(linkage));
 
         // Declare an internal global `extern_with_linkage_foo` which
@@ -191,7 +215,7 @@ fn check_and_apply_linkage<'ll, 'tcx>(
             })
         });
         llvm::set_linkage(g2, llvm::Linkage::InternalLinkage);
-        unsafe { llvm::LLVMSetInitializer(g2, g1) };
+        llvm::set_initializer(g2, g1);
         g2
     } else if cx.tcx.sess.target.arch == "x86"
         && common::is_mingw_gnu_toolchain(&cx.tcx.sess.target)
@@ -235,7 +259,7 @@ impl<'ll> CodegenCx<'ll, '_> {
             }
             _ => self.define_private_global(self.val_ty(cv)),
         };
-        unsafe { llvm::LLVMSetInitializer(gv, cv) };
+        llvm::set_initializer(gv, cv);
         set_global_alignment(self, gv, align);
         llvm::SetUnnamedAddress(gv, llvm::UnnamedAddr::Global);
         gv
@@ -336,12 +360,7 @@ impl<'ll> CodegenCx<'ll, '_> {
             llvm::set_thread_local_mode(g, self.tls_model);
         }
 
-        let dso_local = self.should_assume_dso_local(g, true);
-        if dso_local {
-            unsafe {
-                llvm::LLVMRustSetDSOLocal(g, true);
-            }
-        }
+        let dso_local = self.assume_dso_local(g, true);
 
         if !def_id.is_local() {
             let needs_dll_storage_attr = self.use_dll_storage_attrs
@@ -349,11 +368,11 @@ impl<'ll> CodegenCx<'ll, '_> {
                 // Local definitions can never be imported, so we must not apply
                 // the DLLImport annotation.
                 && !dso_local
-                // ThinLTO can't handle this workaround in all cases, so we don't
-                // emit the attrs. Instead we make them unnecessary by disallowing
-                // dynamic linking when linker plugin based LTO is enabled.
-                && !self.tcx.sess.opts.cg.linker_plugin_lto.enabled()
-                && self.tcx.sess.lto() != Lto::Thin;
+                // Linker plugin ThinLTO doesn't create the self-dllimport Rust uses for rlibs
+                // as the code generation happens out of process. Instead we assume static linkage
+                // and disallow dynamic linking when linker plugin based LTO is enabled.
+                // Regular in-process ThinLTO doesn't need this workaround.
+                && !self.tcx.sess.opts.cg.linker_plugin_lto.enabled();
 
             // If this assertion triggers, there's something wrong with commandline
             // argument validation.
@@ -375,9 +394,7 @@ impl<'ll> CodegenCx<'ll, '_> {
                 // is actually present in the current crate. We can find out via the
                 // is_codegened_item query.
                 if !self.tcx.is_codegened_item(def_id) {
-                    unsafe {
-                        llvm::LLVMSetDLLStorageClass(g, llvm::DLLStorageClass::DllImport);
-                    }
+                    llvm::set_dllimport_storage_class(g);
                 }
             }
         }
@@ -387,9 +404,7 @@ impl<'ll> CodegenCx<'ll, '_> {
             && library.kind.is_dllimport()
         {
             // For foreign (native) libs we know the exact storage type to use.
-            unsafe {
-                llvm::LLVMSetDLLStorageClass(g, llvm::DLLStorageClass::DllImport);
-            }
+            llvm::set_dllimport_storage_class(g);
         }
 
         self.instances.borrow_mut().insert(instance, g);
@@ -415,7 +430,7 @@ impl<'ll> CodegenCx<'ll, '_> {
             let val_llty = self.val_ty(v);
 
             let g = self.get_static_inner(def_id, val_llty);
-            let llty = llvm::LLVMGlobalGetValueType(g);
+            let llty = self.get_type_of_global(g);
 
             let g = if val_llty == llty {
                 g
@@ -458,11 +473,9 @@ impl<'ll> CodegenCx<'ll, '_> {
                 new_g
             };
             set_global_alignment(self, g, alloc.align);
-            llvm::LLVMSetInitializer(g, v);
+            llvm::set_initializer(g, v);
 
-            if self.should_assume_dso_local(g, true) {
-                llvm::LLVMRustSetDSOLocal(g, true);
-            }
+            self.assume_dso_local(g, true);
 
             // Forward the allocation's mutability (picked by the const interner) to LLVM.
             if alloc.mutability.is_not() {
@@ -501,7 +514,7 @@ impl<'ll> CodegenCx<'ll, '_> {
                         llvm::LLVMMDStringInContext2(self.llcx, bytes.as_c_char_ptr(), bytes.len());
                     let data = [section, alloc];
                     let meta = llvm::LLVMMDNodeInContext2(self.llcx, data.as_ptr(), data.len());
-                    let val = llvm::LLVMMetadataAsValue(self.llcx, meta);
+                    let val = self.get_metadata_value(meta);
                     llvm::LLVMAddNamedMetadataOperand(
                         self.llmod,
                         c"wasm.custom_sections".as_ptr(),
diff --git a/compiler/rustc_codegen_llvm/src/context.rs b/compiler/rustc_codegen_llvm/src/context.rs
index ba4fd75fb94..3be8cd5f6ac 100644
--- a/compiler/rustc_codegen_llvm/src/context.rs
+++ b/compiler/rustc_codegen_llvm/src/context.rs
@@ -1,10 +1,11 @@
 use std::borrow::Borrow;
 use std::cell::{Cell, RefCell};
 use std::ffi::{CStr, c_char, c_uint};
+use std::marker::PhantomData;
 use std::ops::Deref;
 use std::str;
 
-use rustc_abi::{HasDataLayout, TargetDataLayout, VariantIdx};
+use rustc_abi::{HasDataLayout, Size, TargetDataLayout, VariantIdx};
 use rustc_codegen_ssa::back::versioned_llvm_target;
 use rustc_codegen_ssa::base::{wants_msvc_seh, wants_wasm_eh};
 use rustc_codegen_ssa::common::TypeKind;
@@ -27,34 +28,36 @@ use rustc_session::config::{
 };
 use rustc_span::source_map::Spanned;
 use rustc_span::{DUMMY_SP, Span};
+use rustc_symbol_mangling::mangle_internal_symbol;
 use rustc_target::spec::{HasTargetSpec, RelocModel, SmallDataThresholdSupport, Target, TlsModel};
 use smallvec::SmallVec;
 
 use crate::back::write::to_llvm_code_model;
 use crate::callee::get_fn;
-use crate::common::{self, AsCCharPtr};
+use crate::common::AsCCharPtr;
 use crate::debuginfo::metadata::apply_vcall_visibility_metadata;
-use crate::llvm::{Metadata, MetadataType};
+use crate::llvm::Metadata;
 use crate::type_::Type;
 use crate::value::Value;
-use crate::{attributes, coverageinfo, debuginfo, llvm, llvm_util};
+use crate::{attributes, common, coverageinfo, debuginfo, llvm, llvm_util};
 
 /// `TyCtxt` (and related cache datastructures) can't be move between threads.
 /// However, there are various cx related functions which we want to be available to the builder and
 /// other compiler pieces. Here we define a small subset which has enough information and can be
 /// moved around more freely.
-pub(crate) struct SimpleCx<'ll> {
+pub(crate) struct SCx<'ll> {
     pub llmod: &'ll llvm::Module,
     pub llcx: &'ll llvm::Context,
+    pub isize_ty: &'ll Type,
 }
 
-impl<'ll> Borrow<SimpleCx<'ll>> for CodegenCx<'ll, '_> {
-    fn borrow(&self) -> &SimpleCx<'ll> {
+impl<'ll> Borrow<SCx<'ll>> for FullCx<'ll, '_> {
+    fn borrow(&self) -> &SCx<'ll> {
         &self.scx
     }
 }
 
-impl<'ll, 'tcx> Deref for CodegenCx<'ll, 'tcx> {
+impl<'ll, 'tcx> Deref for FullCx<'ll, 'tcx> {
     type Target = SimpleCx<'ll>;
 
     #[inline]
@@ -63,10 +66,25 @@ impl<'ll, 'tcx> Deref for CodegenCx<'ll, 'tcx> {
     }
 }
 
+pub(crate) struct GenericCx<'ll, T: Borrow<SCx<'ll>>>(T, PhantomData<SCx<'ll>>);
+
+impl<'ll, T: Borrow<SCx<'ll>>> Deref for GenericCx<'ll, T> {
+    type Target = T;
+
+    #[inline]
+    fn deref(&self) -> &Self::Target {
+        &self.0
+    }
+}
+
+pub(crate) type SimpleCx<'ll> = GenericCx<'ll, SCx<'ll>>;
+
 /// There is one `CodegenCx` per codegen unit. Each one has its own LLVM
 /// `llvm::Context` so that several codegen units may be processed in parallel.
 /// All other LLVM data structures in the `CodegenCx` are tied to that `llvm::Context`.
-pub(crate) struct CodegenCx<'ll, 'tcx> {
+pub(crate) type CodegenCx<'ll, 'tcx> = GenericCx<'ll, FullCx<'ll, 'tcx>>;
+
+pub(crate) struct FullCx<'ll, 'tcx> {
     pub tcx: TyCtxt<'tcx>,
     pub scx: SimpleCx<'ll>,
     pub use_dll_storage_attrs: bool,
@@ -104,8 +122,6 @@ pub(crate) struct CodegenCx<'ll, 'tcx> {
     /// Mapping of scalar types to llvm types.
     pub scalar_lltypes: RefCell<FxHashMap<Ty<'tcx>, &'ll Type>>,
 
-    pub isize_ty: &'ll Type,
-
     /// Extra per-CGU codegen state needed when coverage instrumentation is enabled.
     pub coverage_cx: Option<coverageinfo::CguCoverageContext<'ll, 'tcx>>,
     pub dbg_cx: Option<debuginfo::CodegenUnitDebugContext<'ll, 'tcx>>,
@@ -194,12 +210,18 @@ pub(crate) unsafe fn create_module<'ll>(
             target_data_layout = target_data_layout.replace("-i128:128", "");
         }
     }
+    if llvm_version < (21, 0, 0) {
+        if sess.target.arch == "nvptx64" {
+            // LLVM 21 updated the default layout on nvptx: https://github.com/llvm/llvm-project/pull/124961
+            target_data_layout = target_data_layout.replace("e-p6:32:32-i64", "e-i64");
+        }
+    }
 
     // Ensure the data-layout values hardcoded remain the defaults.
     {
         let tm = crate::back::write::create_informational_target_machine(tcx.sess, false);
         unsafe {
-            llvm::LLVMRustSetDataLayoutFromTargetMachine(llmod, &tm);
+            llvm::LLVMRustSetDataLayoutFromTargetMachine(llmod, tm.raw());
         }
 
         let llvm_data_layout = unsafe { llvm::LLVMGetDataLayoutStr(llmod) };
@@ -573,33 +595,33 @@ impl<'ll, 'tcx> CodegenCx<'ll, 'tcx> {
             None
         };
 
-        let isize_ty = Type::ix_llcx(llcx, tcx.data_layout.pointer_size.bits());
-
-        CodegenCx {
-            tcx,
-            scx: SimpleCx { llcx, llmod },
-            use_dll_storage_attrs,
-            tls_model,
-            codegen_unit,
-            instances: Default::default(),
-            vtables: Default::default(),
-            const_str_cache: Default::default(),
-            const_globals: Default::default(),
-            statics_to_rauw: RefCell::new(Vec::new()),
-            used_statics: RefCell::new(Vec::new()),
-            compiler_used_statics: RefCell::new(Vec::new()),
-            type_lowering: Default::default(),
-            scalar_lltypes: Default::default(),
-            isize_ty,
-            coverage_cx,
-            dbg_cx,
-            eh_personality: Cell::new(None),
-            eh_catch_typeinfo: Cell::new(None),
-            rust_try_fn: Cell::new(None),
-            intrinsics: Default::default(),
-            local_gen_sym_counter: Cell::new(0),
-            renamed_statics: Default::default(),
-        }
+        GenericCx(
+            FullCx {
+                tcx,
+                scx: SimpleCx::new(llmod, llcx, tcx.data_layout.pointer_size),
+                use_dll_storage_attrs,
+                tls_model,
+                codegen_unit,
+                instances: Default::default(),
+                vtables: Default::default(),
+                const_str_cache: Default::default(),
+                const_globals: Default::default(),
+                statics_to_rauw: RefCell::new(Vec::new()),
+                used_statics: RefCell::new(Vec::new()),
+                compiler_used_statics: RefCell::new(Vec::new()),
+                type_lowering: Default::default(),
+                scalar_lltypes: Default::default(),
+                coverage_cx,
+                dbg_cx,
+                eh_personality: Cell::new(None),
+                eh_catch_typeinfo: Cell::new(None),
+                rust_try_fn: Cell::new(None),
+                intrinsics: Default::default(),
+                local_gen_sym_counter: Cell::new(0),
+                renamed_statics: Default::default(),
+            },
+            PhantomData,
+        )
     }
 
     pub(crate) fn statics_to_rauw(&self) -> &RefCell<Vec<(&'ll Value, &'ll Value)>> {
@@ -616,32 +638,56 @@ impl<'ll, 'tcx> CodegenCx<'ll, 'tcx> {
     pub(crate) fn create_used_variable_impl(&self, name: &'static CStr, values: &[&'ll Value]) {
         let array = self.const_array(self.type_ptr(), values);
 
-        unsafe {
-            let g = llvm::LLVMAddGlobal(self.llmod, self.val_ty(array), name.as_ptr());
-            llvm::LLVMSetInitializer(g, array);
-            llvm::set_linkage(g, llvm::Linkage::AppendingLinkage);
-            llvm::set_section(g, c"llvm.metadata");
-        }
+        let g = llvm::add_global(self.llmod, self.val_ty(array), name);
+        llvm::set_initializer(g, array);
+        llvm::set_linkage(g, llvm::Linkage::AppendingLinkage);
+        llvm::set_section(g, c"llvm.metadata");
     }
 }
 impl<'ll> SimpleCx<'ll> {
+    pub(crate) fn get_return_type(&self, ty: &'ll Type) -> &'ll Type {
+        assert_eq!(self.type_kind(ty), TypeKind::Function);
+        unsafe { llvm::LLVMGetReturnType(ty) }
+    }
+    pub(crate) fn get_type_of_global(&self, val: &'ll Value) -> &'ll Type {
+        unsafe { llvm::LLVMGlobalGetValueType(val) }
+    }
     pub(crate) fn val_ty(&self, v: &'ll Value) -> &'ll Type {
         common::val_ty(v)
     }
+}
+impl<'ll> SimpleCx<'ll> {
+    pub(crate) fn new(
+        llmod: &'ll llvm::Module,
+        llcx: &'ll llvm::Context,
+        pointer_size: Size,
+    ) -> Self {
+        let isize_ty = llvm::Type::ix_llcx(llcx, pointer_size.bits());
+        Self(SCx { llmod, llcx, isize_ty }, PhantomData)
+    }
+}
 
+impl<'ll, CX: Borrow<SCx<'ll>>> GenericCx<'ll, CX> {
     pub(crate) fn get_metadata_value(&self, metadata: &'ll Metadata) -> &'ll Value {
-        unsafe { llvm::LLVMMetadataAsValue(self.llcx, metadata) }
+        llvm::LLVMMetadataAsValue(self.llcx(), metadata)
+    }
+
+    // FIXME(autodiff): We should split `ConstCodegenMethods` to pull the reusable parts
+    // onto a trait that is also implemented for GenericCx.
+    pub(crate) fn get_const_i64(&self, n: u64) -> &'ll Value {
+        let ty = unsafe { llvm::LLVMInt64TypeInContext(self.llcx()) };
+        unsafe { llvm::LLVMConstInt(ty, n, llvm::False) }
     }
 
     pub(crate) fn get_function(&self, name: &str) -> Option<&'ll Value> {
         let name = SmallCStr::new(name);
-        unsafe { llvm::LLVMGetNamedFunction(self.llmod, name.as_ptr()) }
+        unsafe { llvm::LLVMGetNamedFunction((**self).borrow().llmod, name.as_ptr()) }
     }
 
-    pub(crate) fn get_md_kind_id(&self, name: &str) -> u32 {
+    pub(crate) fn get_md_kind_id(&self, name: &str) -> llvm::MetadataKindId {
         unsafe {
             llvm::LLVMGetMDKindIDInContext(
-                self.llcx,
+                self.llcx(),
                 name.as_ptr() as *const c_char,
                 name.len() as c_uint,
             )
@@ -650,13 +696,9 @@ impl<'ll> SimpleCx<'ll> {
 
     pub(crate) fn create_metadata(&self, name: String) -> Option<&'ll Metadata> {
         Some(unsafe {
-            llvm::LLVMMDStringInContext2(self.llcx, name.as_ptr() as *const c_char, name.len())
+            llvm::LLVMMDStringInContext2(self.llcx(), name.as_ptr() as *const c_char, name.len())
         })
     }
-
-    pub(crate) fn type_kind(&self, ty: &'ll Type) -> TypeKind {
-        unsafe { llvm::LLVMRustGetTypeKind(ty).to_generic() }
-    }
 }
 
 impl<'ll, 'tcx> MiscCodegenMethods<'tcx> for CodegenCx<'ll, 'tcx> {
@@ -1104,6 +1146,18 @@ impl<'ll> CodegenCx<'ll, '_> {
         ifn!("llvm.usub.sat.i64", fn(t_i64, t_i64) -> t_i64);
         ifn!("llvm.usub.sat.i128", fn(t_i128, t_i128) -> t_i128);
 
+        ifn!("llvm.scmp.i8.i8", fn(t_i8, t_i8) -> t_i8);
+        ifn!("llvm.scmp.i8.i16", fn(t_i16, t_i16) -> t_i8);
+        ifn!("llvm.scmp.i8.i32", fn(t_i32, t_i32) -> t_i8);
+        ifn!("llvm.scmp.i8.i64", fn(t_i64, t_i64) -> t_i8);
+        ifn!("llvm.scmp.i8.i128", fn(t_i128, t_i128) -> t_i8);
+
+        ifn!("llvm.ucmp.i8.i8", fn(t_i8, t_i8) -> t_i8);
+        ifn!("llvm.ucmp.i8.i16", fn(t_i16, t_i16) -> t_i8);
+        ifn!("llvm.ucmp.i8.i32", fn(t_i32, t_i32) -> t_i8);
+        ifn!("llvm.ucmp.i8.i64", fn(t_i64, t_i64) -> t_i8);
+        ifn!("llvm.ucmp.i8.i128", fn(t_i128, t_i128) -> t_i8);
+
         ifn!("llvm.lifetime.start.p0i8", fn(t_i64, ptr) -> void);
         ifn!("llvm.lifetime.end.p0i8", fn(t_i64, ptr) -> void);
 
@@ -1177,7 +1231,7 @@ impl<'ll> CodegenCx<'ll, '_> {
             Some(def_id) => self.get_static(def_id),
             _ => {
                 let ty = self.type_struct(&[self.type_ptr(), self.type_ptr()], false);
-                self.declare_global("rust_eh_catch_typeinfo", ty)
+                self.declare_global(&mangle_internal_symbol(self.tcx, "rust_eh_catch_typeinfo"), ty)
             }
         };
         self.eh_catch_typeinfo.set(Some(eh_catch_typeinfo));
@@ -1199,27 +1253,18 @@ impl CodegenCx<'_, '_> {
         name.push_str(&(idx as u64).to_base(ALPHANUMERIC_ONLY));
         name
     }
-
-    /// A wrapper for [`llvm::LLVMSetMetadata`], but it takes `Metadata` as a parameter instead of `Value`.
-    pub(crate) fn set_metadata<'a>(&self, val: &'a Value, kind_id: MetadataType, md: &'a Metadata) {
-        unsafe {
-            let node = llvm::LLVMMetadataAsValue(&self.llcx, md);
-            llvm::LLVMSetMetadata(val, kind_id as c_uint, node);
-        }
-    }
 }
 
-// This is a duplication of the set_metadata function above. However, so far it's the only one
-// shared between both contexts, so it doesn't seem worth it to make the Cx generic like we did it
-// for the Builder.
-impl SimpleCx<'_> {
-    #[allow(unused)]
+impl<'ll, CX: Borrow<SCx<'ll>>> GenericCx<'ll, CX> {
     /// A wrapper for [`llvm::LLVMSetMetadata`], but it takes `Metadata` as a parameter instead of `Value`.
-    pub(crate) fn set_metadata<'a>(&self, val: &'a Value, kind_id: MetadataType, md: &'a Metadata) {
-        unsafe {
-            let node = llvm::LLVMMetadataAsValue(&self.llcx, md);
-            llvm::LLVMSetMetadata(val, kind_id as c_uint, node);
-        }
+    pub(crate) fn set_metadata<'a>(
+        &self,
+        val: &'a Value,
+        kind_id: impl Into<llvm::MetadataKindId>,
+        md: &'ll Metadata,
+    ) {
+        let node = self.get_metadata_value(md);
+        llvm::LLVMSetMetadata(val, kind_id.into(), node);
     }
 }
 
diff --git a/compiler/rustc_codegen_llvm/src/coverageinfo/ffi.rs b/compiler/rustc_codegen_llvm/src/coverageinfo/ffi.rs
index b617f4d37f5..f6000e72840 100644
--- a/compiler/rustc_codegen_llvm/src/coverageinfo/ffi.rs
+++ b/compiler/rustc_codegen_llvm/src/coverageinfo/ffi.rs
@@ -146,6 +146,7 @@ pub(crate) struct CoverageSpan {
 #[derive(Clone, Debug, Default)]
 pub(crate) struct Regions {
     pub(crate) code_regions: Vec<CodeRegion>,
+    pub(crate) expansion_regions: Vec<ExpansionRegion>,
     pub(crate) branch_regions: Vec<BranchRegion>,
     pub(crate) mcdc_branch_regions: Vec<MCDCBranchRegion>,
     pub(crate) mcdc_decision_regions: Vec<MCDCDecisionRegion>,
@@ -154,10 +155,16 @@ pub(crate) struct Regions {
 impl Regions {
     /// Returns true if none of this structure's tables contain any regions.
     pub(crate) fn has_no_regions(&self) -> bool {
-        let Self { code_regions, branch_regions, mcdc_branch_regions, mcdc_decision_regions } =
-            self;
+        let Self {
+            code_regions,
+            expansion_regions,
+            branch_regions,
+            mcdc_branch_regions,
+            mcdc_decision_regions,
+        } = self;
 
         code_regions.is_empty()
+            && expansion_regions.is_empty()
             && branch_regions.is_empty()
             && mcdc_branch_regions.is_empty()
             && mcdc_decision_regions.is_empty()
@@ -172,6 +179,14 @@ pub(crate) struct CodeRegion {
     pub(crate) counter: Counter,
 }
 
+/// Must match the layout of `LLVMRustCoverageExpansionRegion`.
+#[derive(Clone, Debug)]
+#[repr(C)]
+pub(crate) struct ExpansionRegion {
+    pub(crate) cov_span: CoverageSpan,
+    pub(crate) expanded_file_id: u32,
+}
+
 /// Must match the layout of `LLVMRustCoverageBranchRegion`.
 #[derive(Clone, Debug)]
 #[repr(C)]
diff --git a/compiler/rustc_codegen_llvm/src/coverageinfo/llvm_cov.rs b/compiler/rustc_codegen_llvm/src/coverageinfo/llvm_cov.rs
index 2cd7fa3225a..907d6d41a1f 100644
--- a/compiler/rustc_codegen_llvm/src/coverageinfo/llvm_cov.rs
+++ b/compiler/rustc_codegen_llvm/src/coverageinfo/llvm_cov.rs
@@ -63,8 +63,18 @@ pub(crate) fn write_function_mappings_to_buffer(
     expressions: &[ffi::CounterExpression],
     regions: &ffi::Regions,
 ) -> Vec<u8> {
-    let ffi::Regions { code_regions, branch_regions, mcdc_branch_regions, mcdc_decision_regions } =
-        regions;
+    let ffi::Regions {
+        code_regions,
+        expansion_regions,
+        branch_regions,
+        mcdc_branch_regions,
+        mcdc_decision_regions,
+    } = regions;
+
+    // SAFETY:
+    // - All types are FFI-compatible and have matching representations in Rust/C++.
+    // - For pointer/length pairs, the pointer and length come from the same vector or slice.
+    // - C++ code does not retain any pointers after the call returns.
     llvm::build_byte_buffer(|buffer| unsafe {
         llvm::LLVMRustCoverageWriteFunctionMappingsToBuffer(
             virtual_file_mapping.as_ptr(),
@@ -73,6 +83,8 @@ pub(crate) fn write_function_mappings_to_buffer(
             expressions.len(),
             code_regions.as_ptr(),
             code_regions.len(),
+            expansion_regions.as_ptr(),
+            expansion_regions.len(),
             branch_regions.as_ptr(),
             branch_regions.len(),
             mcdc_branch_regions.as_ptr(),
diff --git a/compiler/rustc_codegen_llvm/src/coverageinfo/mapgen/covfun.rs b/compiler/rustc_codegen_llvm/src/coverageinfo/mapgen/covfun.rs
index 460a4664615..048e1988c32 100644
--- a/compiler/rustc_codegen_llvm/src/coverageinfo/mapgen/covfun.rs
+++ b/compiler/rustc_codegen_llvm/src/coverageinfo/mapgen/covfun.rs
@@ -8,10 +8,11 @@ use std::ffi::CString;
 
 use rustc_abi::Align;
 use rustc_codegen_ssa::traits::{
-    BaseTypeCodegenMethods, ConstCodegenMethods, StaticCodegenMethods,
+    BaseTypeCodegenMethods as _, ConstCodegenMethods, StaticCodegenMethods,
 };
 use rustc_middle::mir::coverage::{
-    CovTerm, CoverageIdsInfo, Expression, FunctionCoverageInfo, Mapping, MappingKind, Op,
+    BasicCoverageBlock, CovTerm, CoverageIdsInfo, Expression, FunctionCoverageInfo, Mapping,
+    MappingKind, Op,
 };
 use rustc_middle::ty::{Instance, TyCtxt};
 use rustc_span::Span;
@@ -53,7 +54,7 @@ pub(crate) fn prepare_covfun_record<'tcx>(
     let fn_cov_info = tcx.instance_mir(instance.def).function_coverage_info.as_deref()?;
     let ids_info = tcx.coverage_ids_info(instance.def)?;
 
-    let expressions = prepare_expressions(fn_cov_info, ids_info, is_used);
+    let expressions = prepare_expressions(ids_info);
 
     let mut covfun = CovfunRecord {
         mangled_function_name: tcx.symbol_name(instance).name,
@@ -75,26 +76,14 @@ pub(crate) fn prepare_covfun_record<'tcx>(
 }
 
 /// Convert the function's coverage-counter expressions into a form suitable for FFI.
-fn prepare_expressions(
-    fn_cov_info: &FunctionCoverageInfo,
-    ids_info: &CoverageIdsInfo,
-    is_used: bool,
-) -> Vec<ffi::CounterExpression> {
-    // If any counters or expressions were removed by MIR opts, replace their
-    // terms with zero.
-    let counter_for_term = |term| {
-        if !is_used || ids_info.is_zero_term(term) {
-            ffi::Counter::ZERO
-        } else {
-            ffi::Counter::from_term(term)
-        }
-    };
+fn prepare_expressions(ids_info: &CoverageIdsInfo) -> Vec<ffi::CounterExpression> {
+    let counter_for_term = ffi::Counter::from_term;
 
     // We know that LLVM will optimize out any unused expressions before
     // producing the final coverage map, so there's no need to do the same
     // thing on the Rust side unless we're confident we can do much better.
     // (See `CounterExpressionsMinimizer` in `CoverageMappingWriter.cpp`.)
-    fn_cov_info
+    ids_info
         .expressions
         .iter()
         .map(move |&Expression { lhs, op, rhs }| ffi::CounterExpression {
@@ -116,9 +105,14 @@ fn fill_region_tables<'tcx>(
     ids_info: &'tcx CoverageIdsInfo,
     covfun: &mut CovfunRecord<'tcx>,
 ) {
-    // Currently a function's mappings must all be in the same file as its body span.
+    // Currently a function's mappings must all be in the same file, so use the
+    // first mapping's span to determine the file.
     let source_map = tcx.sess.source_map();
-    let source_file = source_map.lookup_source_file(fn_cov_info.body_span.lo());
+    let Some(first_span) = (try { fn_cov_info.mappings.first()?.span }) else {
+        debug_assert!(false, "function has no mappings: {:?}", covfun.mangled_function_name);
+        return;
+    };
+    let source_file = source_map.lookup_source_file(first_span.lo());
 
     // Look up the global file ID for that file.
     let global_file_id = global_file_table.global_file_id_for_file(&source_file);
@@ -126,51 +120,55 @@ fn fill_region_tables<'tcx>(
     // Associate that global file ID with a local file ID for this function.
     let local_file_id = covfun.virtual_file_mapping.local_id_for_global(global_file_id);
 
-    let ffi::Regions { code_regions, branch_regions, mcdc_branch_regions, mcdc_decision_regions } =
-        &mut covfun.regions;
-
-    let make_cov_span = |span: Span| {
-        spans::make_coverage_span(local_file_id, source_map, fn_cov_info, &source_file, span)
-    };
+    // In rare cases, _all_ of a function's spans are discarded, and coverage
+    // codegen needs to handle that gracefully to avoid #133606.
+    // It's hard for tests to trigger this organically, so instead we set
+    // `-Zcoverage-options=discard-all-spans-in-codegen` to force it to occur.
     let discard_all = tcx.sess.coverage_discard_all_spans_in_codegen();
+    let make_coords = |span: Span| {
+        if discard_all { None } else { spans::make_coords(source_map, &source_file, span) }
+    };
+
+    let ffi::Regions {
+        code_regions,
+        expansion_regions: _, // FIXME(Zalathar): Fill out support for expansion regions
+        branch_regions,
+        mcdc_branch_regions,
+        mcdc_decision_regions,
+    } = &mut covfun.regions;
 
     // For each counter/region pair in this function+file, convert it to a
     // form suitable for FFI.
-    let is_zero_term = |term| !covfun.is_used || ids_info.is_zero_term(term);
     for &Mapping { ref kind, span } in &fn_cov_info.mappings {
-        // If the mapping refers to counters/expressions that were removed by
-        // MIR opts, replace those occurrences with zero.
-        let kind = kind.map_terms(|term| if is_zero_term(term) { CovTerm::Zero } else { term });
-
-        // Convert the `Span` into coordinates that we can pass to LLVM, or
-        // discard the span if conversion fails. In rare, cases _all_ of a
-        // function's spans are discarded, and the rest of coverage codegen
-        // needs to handle that gracefully to avoid a repeat of #133606.
-        // We don't have a good test case for triggering that organically, so
-        // instead we set `-Zcoverage-options=discard-all-spans-in-codegen`
-        // to force it to occur.
-        let Some(cov_span) = make_cov_span(span) else { continue };
-        if discard_all {
-            continue;
-        }
+        // If this function is unused, replace all counters with zero.
+        let counter_for_bcb = |bcb: BasicCoverageBlock| -> ffi::Counter {
+            let term = if covfun.is_used {
+                ids_info.term_for_bcb[bcb].expect("every BCB in a mapping was given a term")
+            } else {
+                CovTerm::Zero
+            };
+            ffi::Counter::from_term(term)
+        };
+
+        let Some(coords) = make_coords(span) else { continue };
+        let cov_span = coords.make_coverage_span(local_file_id);
 
-        match kind {
-            MappingKind::Code(term) => {
-                code_regions
-                    .push(ffi::CodeRegion { cov_span, counter: ffi::Counter::from_term(term) });
+        match *kind {
+            MappingKind::Code { bcb } => {
+                code_regions.push(ffi::CodeRegion { cov_span, counter: counter_for_bcb(bcb) });
             }
-            MappingKind::Branch { true_term, false_term } => {
+            MappingKind::Branch { true_bcb, false_bcb } => {
                 branch_regions.push(ffi::BranchRegion {
                     cov_span,
-                    true_counter: ffi::Counter::from_term(true_term),
-                    false_counter: ffi::Counter::from_term(false_term),
+                    true_counter: counter_for_bcb(true_bcb),
+                    false_counter: counter_for_bcb(false_bcb),
                 });
             }
-            MappingKind::MCDCBranch { true_term, false_term, mcdc_params } => {
+            MappingKind::MCDCBranch { true_bcb, false_bcb, mcdc_params } => {
                 mcdc_branch_regions.push(ffi::MCDCBranchRegion {
                     cov_span,
-                    true_counter: ffi::Counter::from_term(true_term),
-                    false_counter: ffi::Counter::from_term(false_term),
+                    true_counter: counter_for_bcb(true_bcb),
+                    false_counter: counter_for_bcb(false_bcb),
                     mcdc_branch_params: ffi::mcdc::BranchParameters::from(mcdc_params),
                 });
             }
diff --git a/compiler/rustc_codegen_llvm/src/coverageinfo/mapgen/spans.rs b/compiler/rustc_codegen_llvm/src/coverageinfo/mapgen/spans.rs
index 6d1d91340c2..39a59560c9d 100644
--- a/compiler/rustc_codegen_llvm/src/coverageinfo/mapgen/spans.rs
+++ b/compiler/rustc_codegen_llvm/src/coverageinfo/mapgen/spans.rs
@@ -1,4 +1,3 @@
-use rustc_middle::mir::coverage::FunctionCoverageInfo;
 use rustc_span::source_map::SourceMap;
 use rustc_span::{BytePos, Pos, SourceFile, Span};
 use tracing::debug;
@@ -6,24 +5,41 @@ use tracing::debug;
 use crate::coverageinfo::ffi;
 use crate::coverageinfo::mapgen::LocalFileId;
 
+/// Line and byte-column coordinates of a source code span within some file.
+/// The file itself must be tracked separately.
+#[derive(Clone, Copy, Debug)]
+pub(crate) struct Coords {
+    /// 1-based starting line of the source code span.
+    pub(crate) start_line: u32,
+    /// 1-based starting column (in bytes) of the source code span.
+    pub(crate) start_col: u32,
+    /// 1-based ending line of the source code span.
+    pub(crate) end_line: u32,
+    /// 1-based ending column (in bytes) of the source code span. High bit must be unset.
+    pub(crate) end_col: u32,
+}
+
+impl Coords {
+    /// Attaches a local file ID to these coordinates to produce an `ffi::CoverageSpan`.
+    pub(crate) fn make_coverage_span(&self, local_file_id: LocalFileId) -> ffi::CoverageSpan {
+        let &Self { start_line, start_col, end_line, end_col } = self;
+        let file_id = local_file_id.as_u32();
+        ffi::CoverageSpan { file_id, start_line, start_col, end_line, end_col }
+    }
+}
+
 /// Converts the span into its start line and column, and end line and column.
 ///
 /// Line numbers and column numbers are 1-based. Unlike most column numbers emitted by
 /// the compiler, these column numbers are denoted in **bytes**, because that's what
 /// LLVM's `llvm-cov` tool expects to see in coverage maps.
 ///
-/// Returns `None` if the conversion failed for some reason. This shouldn't happen,
+/// Returns `None` if the conversion failed for some reason. This should be uncommon,
 /// but it's hard to rule out entirely (especially in the presence of complex macros
 /// or other expansions), and if it does happen then skipping a span or function is
 /// better than an ICE or `llvm-cov` failure that the user might have no way to avoid.
-pub(crate) fn make_coverage_span(
-    file_id: LocalFileId,
-    source_map: &SourceMap,
-    fn_cov_info: &FunctionCoverageInfo,
-    file: &SourceFile,
-    span: Span,
-) -> Option<ffi::CoverageSpan> {
-    let span = ensure_non_empty_span(source_map, fn_cov_info, span)?;
+pub(crate) fn make_coords(source_map: &SourceMap, file: &SourceFile, span: Span) -> Option<Coords> {
+    let span = ensure_non_empty_span(source_map, span)?;
 
     let lo = span.lo();
     let hi = span.hi();
@@ -46,8 +62,7 @@ pub(crate) fn make_coverage_span(
     start_line = source_map.doctest_offset_line(&file.name, start_line);
     end_line = source_map.doctest_offset_line(&file.name, end_line);
 
-    check_coverage_span(ffi::CoverageSpan {
-        file_id: file_id.as_u32(),
+    check_coords(Coords {
         start_line: start_line as u32,
         start_col: start_col as u32,
         end_line: end_line as u32,
@@ -55,36 +70,22 @@ pub(crate) fn make_coverage_span(
     })
 }
 
-fn ensure_non_empty_span(
-    source_map: &SourceMap,
-    fn_cov_info: &FunctionCoverageInfo,
-    span: Span,
-) -> Option<Span> {
+fn ensure_non_empty_span(source_map: &SourceMap, span: Span) -> Option<Span> {
     if !span.is_empty() {
         return Some(span);
     }
 
-    let lo = span.lo();
-    let hi = span.hi();
-
-    // The span is empty, so try to expand it to cover an adjacent '{' or '}',
-    // but only within the bounds of the body span.
-    let try_next = hi < fn_cov_info.body_span.hi();
-    let try_prev = fn_cov_info.body_span.lo() < lo;
-    if !(try_next || try_prev) {
-        return None;
-    }
-
+    // The span is empty, so try to enlarge it to cover an adjacent '{' or '}'.
     source_map
         .span_to_source(span, |src, start, end| try {
             // Adjusting span endpoints by `BytePos(1)` is normally a bug,
             // but in this case we have specifically checked that the character
             // we're skipping over is one of two specific ASCII characters, so
             // adjusting by exactly 1 byte is correct.
-            if try_next && src.as_bytes()[end] == b'{' {
-                Some(span.with_hi(hi + BytePos(1)))
-            } else if try_prev && src.as_bytes()[start - 1] == b'}' {
-                Some(span.with_lo(lo - BytePos(1)))
+            if src.as_bytes().get(end).copied() == Some(b'{') {
+                Some(span.with_hi(span.hi() + BytePos(1)))
+            } else if start > 0 && src.as_bytes()[start - 1] == b'}' {
+                Some(span.with_lo(span.lo() - BytePos(1)))
             } else {
                 None
             }
@@ -96,8 +97,8 @@ fn ensure_non_empty_span(
 /// it will immediately exit with a fatal error. To prevent that from happening,
 /// discard regions that are improperly ordered, or might be interpreted in a
 /// way that makes them improperly ordered.
-fn check_coverage_span(cov_span: ffi::CoverageSpan) -> Option<ffi::CoverageSpan> {
-    let ffi::CoverageSpan { file_id: _, start_line, start_col, end_line, end_col } = cov_span;
+fn check_coords(coords: Coords) -> Option<Coords> {
+    let Coords { start_line, start_col, end_line, end_col } = coords;
 
     // Line/column coordinates are supposed to be 1-based. If we ever emit
     // coordinates of 0, `llvm-cov` might misinterpret them.
@@ -110,17 +111,17 @@ fn check_coverage_span(cov_span: ffi::CoverageSpan) -> Option<ffi::CoverageSpan>
     let is_ordered = (start_line, start_col) <= (end_line, end_col);
 
     if all_nonzero && end_col_has_high_bit_unset && is_ordered {
-        Some(cov_span)
+        Some(coords)
     } else {
         debug!(
-            ?cov_span,
+            ?coords,
             ?all_nonzero,
             ?end_col_has_high_bit_unset,
             ?is_ordered,
             "Skipping source region that would be misinterpreted or rejected by LLVM"
         );
         // If this happens in a debug build, ICE to make it easier to notice.
-        debug_assert!(false, "Improper source region: {cov_span:?}");
+        debug_assert!(false, "Improper source region: {coords:?}");
         None
     }
 }
diff --git a/compiler/rustc_codegen_llvm/src/coverageinfo/mod.rs b/compiler/rustc_codegen_llvm/src/coverageinfo/mod.rs
index 021108cd51c..ea7f581a3cb 100644
--- a/compiler/rustc_codegen_llvm/src/coverageinfo/mod.rs
+++ b/compiler/rustc_codegen_llvm/src/coverageinfo/mod.rs
@@ -160,21 +160,12 @@ impl<'tcx> CoverageInfoBuilderMethods<'tcx> for Builder<'_, '_, 'tcx> {
             CoverageKind::SpanMarker | CoverageKind::BlockMarker { .. } => unreachable!(
                 "marker statement {kind:?} should have been removed by CleanupPostBorrowck"
             ),
-            CoverageKind::CounterIncrement { id } => {
-                // The number of counters passed to `llvm.instrprof.increment` might
-                // be smaller than the number originally inserted by the instrumentor,
-                // if some high-numbered counters were removed by MIR optimizations.
-                // If so, LLVM's profiler runtime will use fewer physical counters.
-                let num_counters = ids_info.num_counters_after_mir_opts();
-                assert!(
-                    num_counters as usize <= function_coverage_info.num_counters,
-                    "num_counters disagreement: query says {num_counters} but function info only has {}",
-                    function_coverage_info.num_counters
-                );
-
+            CoverageKind::VirtualCounter { bcb }
+                if let Some(&id) = ids_info.phys_counter_for_node.get(&bcb) =>
+            {
                 let fn_name = bx.get_pgo_func_name_var(instance);
                 let hash = bx.const_u64(function_coverage_info.function_source_hash);
-                let num_counters = bx.const_u32(num_counters);
+                let num_counters = bx.const_u32(ids_info.num_counters);
                 let index = bx.const_u32(id.as_u32());
                 debug!(
                     "codegen intrinsic instrprof.increment(fn_name={:?}, hash={:?}, num_counters={:?}, index={:?})",
@@ -182,10 +173,8 @@ impl<'tcx> CoverageInfoBuilderMethods<'tcx> for Builder<'_, '_, 'tcx> {
                 );
                 bx.instrprof_increment(fn_name, hash, num_counters, index);
             }
-            CoverageKind::ExpressionUsed { id: _ } => {
-                // Expression-used statements are markers that are handled by
-                // `coverage_ids_info`, so there's nothing to codegen here.
-            }
+            // If a BCB doesn't have an associated physical counter, there's nothing to codegen.
+            CoverageKind::VirtualCounter { .. } => {}
             CoverageKind::CondBitmapUpdate { index, decision_depth } => {
                 let cond_bitmap = coverage_cx
                     .try_get_mcdc_condition_bitmap(&instance, decision_depth)
diff --git a/compiler/rustc_codegen_llvm/src/debuginfo/gdb.rs b/compiler/rustc_codegen_llvm/src/debuginfo/gdb.rs
index 2c9f1cda13a..4ffe551df09 100644
--- a/compiler/rustc_codegen_llvm/src/debuginfo/gdb.rs
+++ b/compiler/rustc_codegen_llvm/src/debuginfo/gdb.rs
@@ -73,7 +73,7 @@ pub(crate) fn get_or_insert_gdb_debug_scripts_section_global<'ll>(
                 .define_global(section_var_name, llvm_type)
                 .unwrap_or_else(|| bug!("symbol `{}` is already defined", section_var_name));
             llvm::set_section(section_var, c".debug_gdb_scripts");
-            llvm::LLVMSetInitializer(section_var, cx.const_bytes(section_contents));
+            llvm::set_initializer(section_var, cx.const_bytes(section_contents));
             llvm::LLVMSetGlobalConstant(section_var, llvm::True);
             llvm::LLVMSetUnnamedAddress(section_var, llvm::UnnamedAddr::Global);
             llvm::set_linkage(section_var, llvm::Linkage::LinkOnceODRLinkage);
@@ -87,7 +87,7 @@ pub(crate) fn get_or_insert_gdb_debug_scripts_section_global<'ll>(
 
 pub(crate) fn needs_gdb_debug_scripts_section(cx: &CodegenCx<'_, '_>) -> bool {
     let omit_gdb_pretty_printer_section =
-        attr::contains_name(cx.tcx.hir().krate_attrs(), sym::omit_gdb_pretty_printer_section);
+        attr::contains_name(cx.tcx.hir_krate_attrs(), sym::omit_gdb_pretty_printer_section);
 
     // To ensure the section `__rustc_debug_gdb_scripts_section__` will not create
     // ODR violations at link time, this section will not be emitted for rlibs since
diff --git a/compiler/rustc_codegen_llvm/src/debuginfo/metadata.rs b/compiler/rustc_codegen_llvm/src/debuginfo/metadata.rs
index f497ba95661..2eaaf127e41 100644
--- a/compiler/rustc_codegen_llvm/src/debuginfo/metadata.rs
+++ b/compiler/rustc_codegen_llvm/src/debuginfo/metadata.rs
@@ -2,6 +2,7 @@ use std::borrow::Cow;
 use std::fmt::{self, Write};
 use std::hash::{Hash, Hasher};
 use std::path::{Path, PathBuf};
+use std::sync::Arc;
 use std::{iter, ptr};
 
 use libc::{c_char, c_longlong, c_uint};
@@ -11,7 +12,9 @@ use rustc_codegen_ssa::traits::*;
 use rustc_hir::def::{CtorKind, DefKind};
 use rustc_hir::def_id::{DefId, LOCAL_CRATE};
 use rustc_middle::bug;
-use rustc_middle::ty::layout::{HasTypingEnv, LayoutOf, TyAndLayout};
+use rustc_middle::ty::layout::{
+    HasTypingEnv, LayoutOf, TyAndLayout, WIDE_PTR_ADDR, WIDE_PTR_EXTRA,
+};
 use rustc_middle::ty::{
     self, AdtKind, CoroutineArgsExt, ExistentialTraitRef, Instance, Ty, TyCtxt, Visibility,
 };
@@ -34,12 +37,12 @@ use crate::common::{AsCCharPtr, CodegenCx};
 use crate::debuginfo::dwarf_const;
 use crate::debuginfo::metadata::type_map::build_type_with_children;
 use crate::debuginfo::utils::{WidePtrKind, wide_pointer_kind};
+use crate::llvm;
 use crate::llvm::debuginfo::{
-    DIDescriptor, DIFile, DIFlags, DILexicalBlock, DIScope, DIType, DebugEmissionKind,
-    DebugNameTableKind,
+    DIBasicType, DIBuilder, DICompositeType, DIDescriptor, DIFile, DIFlags, DILexicalBlock,
+    DIScope, DIType, DebugEmissionKind, DebugNameTableKind,
 };
 use crate::value::Value;
-use crate::{abi, llvm};
 
 impl PartialEq for llvm::Metadata {
     fn eq(&self, other: &Self) -> bool {
@@ -66,7 +69,8 @@ pub(super) const UNKNOWN_COLUMN_NUMBER: c_uint = 0;
 
 const NO_SCOPE_METADATA: Option<&DIScope> = None;
 /// A function that returns an empty list of generic parameter debuginfo nodes.
-const NO_GENERICS: for<'ll> fn(&CodegenCx<'ll, '_>) -> SmallVec<&'ll DIType> = |_| SmallVec::new();
+const NO_GENERICS: for<'ll> fn(&CodegenCx<'ll, '_>) -> SmallVec<Option<&'ll DIType>> =
+    |_| SmallVec::new();
 
 // SmallVec is used quite a bit in this module, so create a shorthand.
 // The actual number of elements is not so important.
@@ -211,16 +215,16 @@ fn build_pointer_or_reference_di_node<'ll, 'tcx>(
                     };
 
                     let layout = cx.layout_of(layout_type);
-                    let addr_field = layout.field(cx, abi::WIDE_PTR_ADDR);
-                    let extra_field = layout.field(cx, abi::WIDE_PTR_EXTRA);
+                    let addr_field = layout.field(cx, WIDE_PTR_ADDR);
+                    let extra_field = layout.field(cx, WIDE_PTR_EXTRA);
 
                     let (addr_field_name, extra_field_name) = match wide_pointer_kind {
                         WidePtrKind::Dyn => ("pointer", "vtable"),
                         WidePtrKind::Slice => ("data_ptr", "length"),
                     };
 
-                    assert_eq!(abi::WIDE_PTR_ADDR, 0);
-                    assert_eq!(abi::WIDE_PTR_EXTRA, 1);
+                    assert_eq!(WIDE_PTR_ADDR, 0);
+                    assert_eq!(WIDE_PTR_EXTRA, 1);
 
                     // The data pointer type is a regular, thin pointer, regardless of whether this
                     // is a slice or a trait object.
@@ -241,8 +245,8 @@ fn build_pointer_or_reference_di_node<'ll, 'tcx>(
                             cx,
                             owner,
                             addr_field_name,
-                            (addr_field.size, addr_field.align.abi),
-                            layout.fields.offset(abi::WIDE_PTR_ADDR),
+                            addr_field,
+                            layout.fields.offset(WIDE_PTR_ADDR),
                             DIFlags::FlagZero,
                             data_ptr_type_di_node,
                             None,
@@ -251,8 +255,8 @@ fn build_pointer_or_reference_di_node<'ll, 'tcx>(
                             cx,
                             owner,
                             extra_field_name,
-                            (extra_field.size, extra_field.align.abi),
-                            layout.fields.offset(abi::WIDE_PTR_EXTRA),
+                            extra_field,
+                            layout.fields.offset(WIDE_PTR_EXTRA),
                             DIFlags::FlagZero,
                             type_di_node(cx, extra_field.ty),
                             None,
@@ -309,29 +313,21 @@ fn build_subroutine_type_di_node<'ll, 'tcx>(
 
     debug_context(cx).type_map.unique_id_to_di_node.borrow_mut().remove(&unique_type_id);
 
-    let fn_di_node = unsafe {
-        llvm::LLVMRustDIBuilderCreateSubroutineType(
-            DIB(cx),
-            create_DIArray(DIB(cx), &signature_di_nodes[..]),
-        )
-    };
+    let fn_di_node = create_subroutine_type(cx, create_DIArray(DIB(cx), &signature_di_nodes[..]));
 
     // This is actually a function pointer, so wrap it in pointer DI.
     let name = compute_debuginfo_type_name(cx.tcx, fn_ty, false);
     let (size, align) = match fn_ty.kind() {
-        ty::FnDef(..) => (0, 1),
-        ty::FnPtr(..) => (
-            cx.tcx.data_layout.pointer_size.bits(),
-            cx.tcx.data_layout.pointer_align.abi.bits() as u32,
-        ),
+        ty::FnDef(..) => (Size::ZERO, Align::ONE),
+        ty::FnPtr(..) => (cx.tcx.data_layout.pointer_size, cx.tcx.data_layout.pointer_align.abi),
         _ => unreachable!(),
     };
     let di_node = unsafe {
         llvm::LLVMRustDIBuilderCreatePointerType(
             DIB(cx),
             fn_di_node,
-            size,
-            align,
+            size.bits(),
+            align.bits() as u32,
             0, // Ignore DWARF address space.
             name.as_c_char_ptr(),
             name.len(),
@@ -341,6 +337,13 @@ fn build_subroutine_type_di_node<'ll, 'tcx>(
     DINodeCreationResult::new(di_node, false)
 }
 
+pub(super) fn create_subroutine_type<'ll>(
+    cx: &CodegenCx<'ll, '_>,
+    signature: &'ll DICompositeType,
+) -> &'ll DICompositeType {
+    unsafe { llvm::LLVMRustDIBuilderCreateSubroutineType(DIB(cx), signature) }
+}
+
 /// Create debuginfo for `dyn SomeTrait` types. Currently these are empty structs
 /// we with the correct type name (e.g. "dyn SomeTrait<Foo, Item=u32> + Sync").
 fn build_dyn_type_di_node<'ll, 'tcx>(
@@ -488,26 +491,22 @@ pub(crate) fn type_di_node<'ll, 'tcx>(cx: &CodegenCx<'ll, 'tcx>, t: Ty<'tcx>) ->
 // FIXME(mw): Cache this via a regular UniqueTypeId instead of an extra field in the debug context.
 fn recursion_marker_type_di_node<'ll, 'tcx>(cx: &CodegenCx<'ll, 'tcx>) -> &'ll DIType {
     *debug_context(cx).recursion_marker_type.get_or_init(move || {
-        unsafe {
-            // The choice of type here is pretty arbitrary -
-            // anything reading the debuginfo for a recursive
-            // type is going to see *something* weird - the only
-            // question is what exactly it will see.
-            //
-            // FIXME: the name `<recur_type>` does not fit the naming scheme
-            //        of other types.
-            //
-            // FIXME: it might make sense to use an actual pointer type here
-            //        so that debuggers can show the address.
-            let name = "<recur_type>";
-            llvm::LLVMRustDIBuilderCreateBasicType(
-                DIB(cx),
-                name.as_c_char_ptr(),
-                name.len(),
-                cx.tcx.data_layout.pointer_size.bits(),
-                dwarf_const::DW_ATE_unsigned,
-            )
-        }
+        // The choice of type here is pretty arbitrary -
+        // anything reading the debuginfo for a recursive
+        // type is going to see *something* weird - the only
+        // question is what exactly it will see.
+        //
+        // FIXME: the name `<recur_type>` does not fit the naming scheme
+        //        of other types.
+        //
+        // FIXME: it might make sense to use an actual pointer type here
+        //        so that debuggers can show the address.
+        create_basic_type(
+            cx,
+            "<recur_type>",
+            cx.tcx.data_layout.pointer_size,
+            dwarf_const::DW_ATE_unsigned,
+        )
     })
 }
 
@@ -621,42 +620,38 @@ pub(crate) fn file_metadata<'ll>(cx: &CodegenCx<'ll, '_>, source_file: &SourceFi
         let source =
             cx.sess().opts.unstable_opts.embed_source.then_some(()).and(source_file.src.as_ref());
 
-        unsafe {
-            llvm::LLVMRustDIBuilderCreateFile(
-                DIB(cx),
-                file_name.as_c_char_ptr(),
-                file_name.len(),
-                directory.as_c_char_ptr(),
-                directory.len(),
-                hash_kind,
-                hash_value.as_c_char_ptr(),
-                hash_value.len(),
-                source.map_or(ptr::null(), |x| x.as_c_char_ptr()),
-                source.map_or(0, |x| x.len()),
-            )
-        }
+        create_file(DIB(cx), &file_name, &directory, &hash_value, hash_kind, source)
     }
 }
 
 fn unknown_file_metadata<'ll>(cx: &CodegenCx<'ll, '_>) -> &'ll DIFile {
-    debug_context(cx).created_files.borrow_mut().entry(None).or_insert_with(|| unsafe {
-        let file_name = "<unknown>";
-        let directory = "";
-        let hash_value = "";
+    debug_context(cx).created_files.borrow_mut().entry(None).or_insert_with(|| {
+        create_file(DIB(cx), "<unknown>", "", "", llvm::ChecksumKind::None, None)
+    })
+}
 
+fn create_file<'ll>(
+    builder: &DIBuilder<'ll>,
+    file_name: &str,
+    directory: &str,
+    hash_value: &str,
+    hash_kind: llvm::ChecksumKind,
+    source: Option<&Arc<String>>,
+) -> &'ll DIFile {
+    unsafe {
         llvm::LLVMRustDIBuilderCreateFile(
-            DIB(cx),
+            builder,
             file_name.as_c_char_ptr(),
             file_name.len(),
             directory.as_c_char_ptr(),
             directory.len(),
-            llvm::ChecksumKind::None,
+            hash_kind,
             hash_value.as_c_char_ptr(),
             hash_value.len(),
-            ptr::null(),
-            0,
+            source.map_or(ptr::null(), |x| x.as_c_char_ptr()),
+            source.map_or(0, |x| x.len()),
         )
-    })
+    }
 }
 
 trait MsvcBasicName {
@@ -743,7 +738,7 @@ fn build_cpp_f16_di_node<'ll, 'tcx>(cx: &CodegenCx<'ll, 'tcx>) -> DINodeCreation
                 cx,
                 float_di_node,
                 "bits",
-                cx.size_and_align_of(bits_ty),
+                cx.layout_of(bits_ty),
                 Size::ZERO,
                 DIFlags::FlagZero,
                 type_di_node(cx, bits_ty),
@@ -789,15 +784,7 @@ fn build_basic_type_di_node<'ll, 'tcx>(
         _ => bug!("debuginfo::build_basic_type_di_node - `t` is invalid type"),
     };
 
-    let ty_di_node = unsafe {
-        llvm::LLVMRustDIBuilderCreateBasicType(
-            DIB(cx),
-            name.as_c_char_ptr(),
-            name.len(),
-            cx.size_of(t).bits(),
-            encoding,
-        )
-    };
+    let ty_di_node = create_basic_type(cx, name, cx.size_of(t), encoding);
 
     if !cpp_like_debuginfo {
         return DINodeCreationResult::new(ty_di_node, false);
@@ -825,6 +812,23 @@ fn build_basic_type_di_node<'ll, 'tcx>(
     DINodeCreationResult::new(typedef_di_node, false)
 }
 
+fn create_basic_type<'ll, 'tcx>(
+    cx: &CodegenCx<'ll, 'tcx>,
+    name: &str,
+    size: Size,
+    encoding: u32,
+) -> &'ll DIBasicType {
+    unsafe {
+        llvm::LLVMRustDIBuilderCreateBasicType(
+            DIB(cx),
+            name.as_c_char_ptr(),
+            name.len(),
+            size.bits(),
+            encoding,
+        )
+    }
+}
+
 fn build_foreign_type_di_node<'ll, 'tcx>(
     cx: &CodegenCx<'ll, 'tcx>,
     t: Ty<'tcx>,
@@ -930,17 +934,13 @@ pub(crate) fn build_compile_unit_di_node<'ll, 'tcx>(
     };
 
     unsafe {
-        let compile_unit_file = llvm::LLVMRustDIBuilderCreateFile(
+        let compile_unit_file = create_file(
             debug_context.builder.as_ref(),
-            name_in_debuginfo.as_c_char_ptr(),
-            name_in_debuginfo.len(),
-            work_dir.as_c_char_ptr(),
-            work_dir.len(),
+            &name_in_debuginfo,
+            &work_dir,
+            "",
             llvm::ChecksumKind::None,
-            ptr::null(),
-            0,
-            ptr::null(),
-            0,
+            None,
         );
 
         let unit_metadata = llvm::LLVMRustDIBuilderCreateCompileUnit(
@@ -972,7 +972,7 @@ fn build_field_di_node<'ll, 'tcx>(
     cx: &CodegenCx<'ll, 'tcx>,
     owner: &'ll DIScope,
     name: &str,
-    size_and_align: (Size, Align),
+    layout: TyAndLayout<'tcx>,
     offset: Size,
     flags: DIFlags,
     type_di_node: &'ll DIType,
@@ -984,6 +984,30 @@ fn build_field_di_node<'ll, 'tcx>(
     } else {
         (unknown_file_metadata(cx), UNKNOWN_LINE_NUMBER)
     };
+    create_member_type(
+        cx,
+        owner,
+        name,
+        file_metadata,
+        line_number,
+        layout,
+        offset,
+        flags,
+        type_di_node,
+    )
+}
+
+fn create_member_type<'ll, 'tcx>(
+    cx: &CodegenCx<'ll, 'tcx>,
+    owner: &'ll DIScope,
+    name: &str,
+    file_metadata: &'ll DIType,
+    line_number: u32,
+    layout: TyAndLayout<'tcx>,
+    offset: Size,
+    flags: DIFlags,
+    type_di_node: &'ll DIType,
+) -> &'ll DIType {
     unsafe {
         llvm::LLVMRustDIBuilderCreateMemberType(
             DIB(cx),
@@ -992,8 +1016,8 @@ fn build_field_di_node<'ll, 'tcx>(
             name.len(),
             file_metadata,
             line_number,
-            size_and_align.0.bits(),
-            size_and_align.1.bits() as u32,
+            layout.size.bits(),
+            layout.align.abi.bits() as u32,
             offset.bits(),
             flags,
             type_di_node,
@@ -1077,7 +1101,7 @@ fn build_struct_type_di_node<'ll, 'tcx>(
                         cx,
                         owner,
                         &field_name[..],
-                        (field_layout.size, field_layout.align.abi),
+                        field_layout,
                         struct_type_and_layout.fields.offset(i),
                         visibility_di_flags(cx, f.did, adt_def.did()),
                         type_di_node(cx, field_layout.ty),
@@ -1127,7 +1151,7 @@ fn build_upvar_field_di_nodes<'ll, 'tcx>(
                 cx,
                 closure_or_coroutine_di_node,
                 capture_name.as_str(),
-                cx.size_and_align_of(up_var_ty),
+                cx.layout_of(up_var_ty),
                 layout.fields.offset(index),
                 DIFlags::FlagZero,
                 type_di_node(cx, up_var_ty),
@@ -1172,7 +1196,7 @@ fn build_tuple_type_di_node<'ll, 'tcx>(
                         cx,
                         tuple_di_node,
                         &tuple_field_name(index),
-                        cx.size_and_align_of(component_type),
+                        cx.layout_of(component_type),
                         tuple_type_and_layout.fields.offset(index),
                         DIFlags::FlagZero,
                         type_di_node(cx, component_type),
@@ -1270,7 +1294,7 @@ fn build_union_type_di_node<'ll, 'tcx>(
                         cx,
                         owner,
                         f.name.as_str(),
-                        size_and_align_of(field_layout),
+                        field_layout,
                         Size::ZERO,
                         DIFlags::FlagZero,
                         type_di_node(cx, field_layout.ty),
@@ -1288,32 +1312,33 @@ fn build_union_type_di_node<'ll, 'tcx>(
 fn build_generic_type_param_di_nodes<'ll, 'tcx>(
     cx: &CodegenCx<'ll, 'tcx>,
     ty: Ty<'tcx>,
-) -> SmallVec<&'ll DIType> {
+) -> SmallVec<Option<&'ll DIType>> {
     if let ty::Adt(def, args) = *ty.kind() {
-        if args.types().next().is_some() {
-            let generics = cx.tcx.generics_of(def.did());
-            let names = get_parameter_names(cx, generics);
-            let template_params: SmallVec<_> = iter::zip(args, names)
-                .filter_map(|(kind, name)| {
-                    kind.as_type().map(|ty| {
-                        let actual_type = cx.tcx.normalize_erasing_regions(cx.typing_env(), ty);
-                        let actual_type_di_node = type_di_node(cx, actual_type);
-                        let name = name.as_str();
-                        unsafe {
-                            llvm::LLVMRustDIBuilderCreateTemplateTypeParameter(
-                                DIB(cx),
-                                None,
-                                name.as_c_char_ptr(),
-                                name.len(),
-                                actual_type_di_node,
-                            )
-                        }
-                    })
+        let generics = cx.tcx.generics_of(def.did());
+        return get_template_parameters(cx, generics, args);
+    }
+
+    return smallvec![];
+}
+
+pub(super) fn get_template_parameters<'ll, 'tcx>(
+    cx: &CodegenCx<'ll, 'tcx>,
+    generics: &ty::Generics,
+    args: ty::GenericArgsRef<'tcx>,
+) -> SmallVec<Option<&'ll DIType>> {
+    if args.types().next().is_some() {
+        let names = get_parameter_names(cx, generics);
+        let template_params: SmallVec<_> = iter::zip(args, names)
+            .filter_map(|(kind, name)| {
+                kind.as_type().map(|ty| {
+                    let actual_type = cx.tcx.normalize_erasing_regions(cx.typing_env(), ty);
+                    let actual_type_di_node = type_di_node(cx, actual_type);
+                    Some(cx.create_template_type_parameter(name.as_str(), actual_type_di_node))
                 })
-                .collect();
+            })
+            .collect();
 
-            return template_params;
-        }
+        return template_params;
     }
 
     return smallvec![];
@@ -1417,7 +1442,9 @@ fn build_vtable_type_di_node<'ll, 'tcx>(
     let void_pointer_ty = Ty::new_imm_ptr(tcx, tcx.types.unit);
     let void_pointer_type_di_node = type_di_node(cx, void_pointer_ty);
     let usize_di_node = type_di_node(cx, tcx.types.usize);
-    let (pointer_size, pointer_align) = cx.size_and_align_of(void_pointer_ty);
+    let pointer_layout = cx.layout_of(void_pointer_ty);
+    let pointer_size = pointer_layout.size;
+    let pointer_align = pointer_layout.align.abi;
     // If `usize` is not pointer-sized and -aligned then the size and alignment computations
     // for the vtable as a whole would be wrong. Let's make sure this holds even on weird
     // platforms.
@@ -1473,7 +1500,7 @@ fn build_vtable_type_di_node<'ll, 'tcx>(
                         cx,
                         vtable_type_di_node,
                         &field_name,
-                        (pointer_size, pointer_align),
+                        pointer_layout,
                         field_offset,
                         DIFlags::FlagZero,
                         field_type_di_node,
diff --git a/compiler/rustc_codegen_llvm/src/debuginfo/metadata/enums/cpp_like.rs b/compiler/rustc_codegen_llvm/src/debuginfo/metadata/enums/cpp_like.rs
index a72e205c9b2..07075be55fa 100644
--- a/compiler/rustc_codegen_llvm/src/debuginfo/metadata/enums/cpp_like.rs
+++ b/compiler/rustc_codegen_llvm/src/debuginfo/metadata/enums/cpp_like.rs
@@ -17,8 +17,8 @@ use crate::debuginfo::metadata::enums::DiscrResult;
 use crate::debuginfo::metadata::type_map::{self, Stub, UniqueTypeId};
 use crate::debuginfo::metadata::{
     DINodeCreationResult, NO_GENERICS, NO_SCOPE_METADATA, SmallVec, UNKNOWN_LINE_NUMBER,
-    build_field_di_node, file_metadata, file_metadata_from_def_id, size_and_align_of, type_di_node,
-    unknown_file_metadata, visibility_di_flags,
+    build_field_di_node, create_member_type, file_metadata, file_metadata_from_def_id,
+    size_and_align_of, type_di_node, unknown_file_metadata, visibility_di_flags,
 };
 use crate::debuginfo::utils::DIB;
 use crate::llvm::debuginfo::{DIFile, DIFlags, DIType};
@@ -370,9 +370,9 @@ fn build_single_variant_union_fields<'ll, 'tcx>(
             cx,
             enum_type_di_node,
             &variant_union_field_name(variant_index),
-            // NOTE: We use the size and align of the entire type, not from variant_layout
+            // NOTE: We use the layout of the entire type, not from variant_layout
             //       since the later is sometimes smaller (if it has fewer fields).
-            size_and_align_of(enum_type_and_layout),
+            enum_type_and_layout,
             Size::ZERO,
             visibility_flags,
             variant_struct_type_wrapper_di_node,
@@ -560,7 +560,7 @@ fn build_variant_struct_wrapper_type_di_node<'ll, 'tcx>(
                 cx,
                 wrapper_struct_type_di_node,
                 "value",
-                size_and_align_of(enum_or_coroutine_type_and_layout),
+                enum_or_coroutine_type_and_layout,
                 Size::ZERO,
                 DIFlags::FlagZero,
                 variant_struct_type_di_node,
@@ -820,7 +820,6 @@ fn build_union_fields_for_direct_tag_enum_or_coroutine<'ll, 'tcx>(
             .unwrap_or_else(|| (unknown_file_metadata(cx), UNKNOWN_LINE_NUMBER));
 
         let field_name = variant_union_field_name(variant_member_info.variant_index);
-        let (size, align) = size_and_align_of(enum_type_and_layout);
 
         let variant_struct_type_wrapper = build_variant_struct_wrapper_type_di_node(
             cx,
@@ -840,27 +839,23 @@ fn build_union_fields_for_direct_tag_enum_or_coroutine<'ll, 'tcx>(
             },
         );
 
-        // We use LLVMRustDIBuilderCreateMemberType() member type directly because
+        // We use create_member_type() member type directly because
         // the build_field_di_node() function does not support specifying a source location,
         // which is something that we don't do anywhere else.
-        unsafe {
-            llvm::LLVMRustDIBuilderCreateMemberType(
-                DIB(cx),
-                enum_type_di_node,
-                field_name.as_c_char_ptr(),
-                field_name.len(),
-                file_di_node,
-                line_number,
-                // NOTE: We use the size and align of the entire type, not from variant_layout
-                //       since the later is sometimes smaller (if it has fewer fields).
-                size.bits(),
-                align.bits() as u32,
-                // Union fields are always at offset zero
-                Size::ZERO.bits(),
-                di_flags,
-                variant_struct_type_wrapper,
-            )
-        }
+        create_member_type(
+            cx,
+            enum_type_di_node,
+            &field_name,
+            file_di_node,
+            line_number,
+            // NOTE: We use the layout of the entire type, not from variant_layout
+            //       since the later is sometimes smaller (if it has fewer fields).
+            enum_type_and_layout,
+            // Union fields are always at offset zero
+            Size::ZERO,
+            di_flags,
+            variant_struct_type_wrapper,
+        )
     }));
 
     assert_eq!(
@@ -874,7 +869,7 @@ fn build_union_fields_for_direct_tag_enum_or_coroutine<'ll, 'tcx>(
 
     if is_128_bits {
         let type_di_node = type_di_node(cx, cx.tcx.types.u64);
-        let size_and_align = cx.size_and_align_of(cx.tcx.types.u64);
+        let u64_layout = cx.layout_of(cx.tcx.types.u64);
 
         let (lo_offset, hi_offset) = match cx.tcx.data_layout.endian {
             Endian::Little => (0, 8),
@@ -889,7 +884,7 @@ fn build_union_fields_for_direct_tag_enum_or_coroutine<'ll, 'tcx>(
             cx,
             enum_type_di_node,
             TAG_FIELD_NAME_128_LO,
-            size_and_align,
+            u64_layout,
             lo_offset,
             di_flags,
             type_di_node,
@@ -900,7 +895,7 @@ fn build_union_fields_for_direct_tag_enum_or_coroutine<'ll, 'tcx>(
             cx,
             enum_type_di_node,
             TAG_FIELD_NAME_128_HI,
-            size_and_align,
+            u64_layout,
             hi_offset,
             DIFlags::FlagZero,
             type_di_node,
@@ -911,7 +906,7 @@ fn build_union_fields_for_direct_tag_enum_or_coroutine<'ll, 'tcx>(
             cx,
             enum_type_di_node,
             TAG_FIELD_NAME,
-            cx.size_and_align_of(enum_type_and_layout.field(cx, tag_field).ty),
+            enum_type_and_layout.field(cx, tag_field),
             enum_type_and_layout.fields.offset(tag_field),
             di_flags,
             tag_base_type_di_node,
diff --git a/compiler/rustc_codegen_llvm/src/debuginfo/metadata/enums/mod.rs b/compiler/rustc_codegen_llvm/src/debuginfo/metadata/enums/mod.rs
index 9f6a5cc89e0..6792c307fdc 100644
--- a/compiler/rustc_codegen_llvm/src/debuginfo/metadata/enums/mod.rs
+++ b/compiler/rustc_codegen_llvm/src/debuginfo/metadata/enums/mod.rs
@@ -249,7 +249,7 @@ fn build_enum_variant_struct_type_di_node<'ll, 'tcx>(
                         cx,
                         struct_type_di_node,
                         &field_name,
-                        (field_layout.size, field_layout.align.abi),
+                        field_layout,
                         variant_layout.fields.offset(field_index),
                         di_flags,
                         type_di_node(cx, field_layout.ty),
@@ -332,7 +332,7 @@ fn build_coroutine_variant_struct_type_di_node<'ll, 'tcx>(
                         cx,
                         variant_struct_type_di_node,
                         &field_name,
-                        cx.size_and_align_of(field_type),
+                        cx.layout_of(field_type),
                         variant_layout.fields.offset(field_index),
                         DIFlags::FlagZero,
                         type_di_node(cx, field_type),
@@ -352,7 +352,7 @@ fn build_coroutine_variant_struct_type_di_node<'ll, 'tcx>(
                         cx,
                         variant_struct_type_di_node,
                         upvar_name.as_str(),
-                        cx.size_and_align_of(upvar_ty),
+                        cx.layout_of(upvar_ty),
                         coroutine_type_and_layout.fields.offset(index),
                         DIFlags::FlagZero,
                         type_di_node(cx, upvar_ty),
@@ -363,6 +363,7 @@ fn build_coroutine_variant_struct_type_di_node<'ll, 'tcx>(
 
             state_specific_fields.into_iter().chain(common_fields).collect()
         },
+        // FIXME: this is a no-op. `build_generic_type_param_di_nodes` only works for Adts.
         |cx| build_generic_type_param_di_nodes(cx, coroutine_type_and_layout.ty),
     )
     .di_node
diff --git a/compiler/rustc_codegen_llvm/src/debuginfo/metadata/enums/native.rs b/compiler/rustc_codegen_llvm/src/debuginfo/metadata/enums/native.rs
index 11824398f24..bfd131cfd3d 100644
--- a/compiler/rustc_codegen_llvm/src/debuginfo/metadata/enums/native.rs
+++ b/compiler/rustc_codegen_llvm/src/debuginfo/metadata/enums/native.rs
@@ -13,9 +13,9 @@ use smallvec::smallvec;
 use crate::common::{AsCCharPtr, CodegenCx};
 use crate::debuginfo::metadata::type_map::{self, Stub, StubInfo, UniqueTypeId};
 use crate::debuginfo::metadata::{
-    DINodeCreationResult, NO_GENERICS, SmallVec, UNKNOWN_LINE_NUMBER, file_metadata,
-    file_metadata_from_def_id, size_and_align_of, type_di_node, unknown_file_metadata,
-    visibility_di_flags,
+    DINodeCreationResult, NO_GENERICS, SmallVec, UNKNOWN_LINE_NUMBER, create_member_type,
+    file_metadata, file_metadata_from_def_id, size_and_align_of, type_di_node,
+    unknown_file_metadata, visibility_di_flags,
 };
 use crate::debuginfo::utils::{DIB, create_DIArray, get_namespace_for_item};
 use crate::llvm::debuginfo::{DIFile, DIFlags, DIType};
@@ -363,23 +363,22 @@ fn build_discr_member_di_node<'ll, 'tcx>(
 
         &Variants::Multiple { tag_field, .. } => {
             let tag_base_type = tag_base_type(cx.tcx, enum_or_coroutine_type_and_layout);
-            let (size, align) = cx.size_and_align_of(tag_base_type);
-
-            unsafe {
-                Some(llvm::LLVMRustDIBuilderCreateMemberType(
-                    DIB(cx),
-                    containing_scope,
-                    tag_name.as_c_char_ptr(),
-                    tag_name.len(),
-                    unknown_file_metadata(cx),
-                    UNKNOWN_LINE_NUMBER,
-                    size.bits(),
-                    align.bits() as u32,
-                    enum_or_coroutine_type_and_layout.fields.offset(tag_field).bits(),
-                    DIFlags::FlagArtificial,
-                    type_di_node(cx, tag_base_type),
-                ))
-            }
+            let ty = type_di_node(cx, tag_base_type);
+            let file = unknown_file_metadata(cx);
+
+            let layout = cx.layout_of(tag_base_type);
+
+            Some(create_member_type(
+                cx,
+                containing_scope,
+                &tag_name,
+                file,
+                UNKNOWN_LINE_NUMBER,
+                layout,
+                enum_or_coroutine_type_and_layout.fields.offset(tag_field),
+                DIFlags::FlagArtificial,
+                ty,
+            ))
         }
     }
 }
@@ -437,6 +436,12 @@ fn build_enum_variant_member_di_node<'ll, 'tcx>(
         .source_info
         .unwrap_or_else(|| (unknown_file_metadata(cx), UNKNOWN_LINE_NUMBER));
 
+    let discr = discr_value.opt_single_val().map(|value| {
+        let tag_base_type = tag_base_type(cx.tcx, enum_type_and_layout);
+        let size = cx.size_of(tag_base_type);
+        cx.const_uint_big(cx.type_ix(size.bits()), value)
+    });
+
     unsafe {
         llvm::LLVMRustDIBuilderCreateVariantMemberType(
             DIB(cx),
@@ -448,7 +453,7 @@ fn build_enum_variant_member_di_node<'ll, 'tcx>(
             enum_type_and_layout.size.bits(),
             enum_type_and_layout.align.abi.bits() as u32,
             Size::ZERO.bits(),
-            discr_value.opt_single_val().map(|value| cx.const_u128(value)),
+            discr,
             DIFlags::FlagZero,
             variant_member_info.variant_struct_type_di_node,
         )
diff --git a/compiler/rustc_codegen_llvm/src/debuginfo/metadata/type_map.rs b/compiler/rustc_codegen_llvm/src/debuginfo/metadata/type_map.rs
index af1d503ad6a..ae2ab32ef53 100644
--- a/compiler/rustc_codegen_llvm/src/debuginfo/metadata/type_map.rs
+++ b/compiler/rustc_codegen_llvm/src/debuginfo/metadata/type_map.rs
@@ -257,7 +257,7 @@ pub(super) fn build_type_with_children<'ll, 'tcx>(
     cx: &CodegenCx<'ll, 'tcx>,
     stub_info: StubInfo<'ll, 'tcx>,
     members: impl FnOnce(&CodegenCx<'ll, 'tcx>, &'ll DIType) -> SmallVec<&'ll DIType>,
-    generics: impl FnOnce(&CodegenCx<'ll, 'tcx>) -> SmallVec<&'ll DIType>,
+    generics: impl FnOnce(&CodegenCx<'ll, 'tcx>) -> SmallVec<Option<&'ll DIType>>,
 ) -> DINodeCreationResult<'ll> {
     assert_eq!(debug_context(cx).type_map.di_node_for_unique_id(stub_info.unique_type_id), None);
 
@@ -265,8 +265,7 @@ pub(super) fn build_type_with_children<'ll, 'tcx>(
 
     let members: SmallVec<_> =
         members(cx, stub_info.metadata).into_iter().map(|node| Some(node)).collect();
-    let generics: SmallVec<Option<&'ll DIType>> =
-        generics(cx).into_iter().map(|node| Some(node)).collect();
+    let generics = generics(cx);
 
     if !(members.is_empty() && generics.is_empty()) {
         unsafe {
diff --git a/compiler/rustc_codegen_llvm/src/debuginfo/mod.rs b/compiler/rustc_codegen_llvm/src/debuginfo/mod.rs
index 496178c6b1d..ae7d080db66 100644
--- a/compiler/rustc_codegen_llvm/src/debuginfo/mod.rs
+++ b/compiler/rustc_codegen_llvm/src/debuginfo/mod.rs
@@ -2,15 +2,16 @@
 
 use std::cell::{OnceCell, RefCell};
 use std::ops::Range;
-use std::{iter, ptr};
+use std::ptr;
+use std::sync::Arc;
 
 use libc::c_uint;
+use metadata::create_subroutine_type;
 use rustc_abi::Size;
 use rustc_codegen_ssa::debuginfo::type_names;
 use rustc_codegen_ssa::mir::debuginfo::VariableKind::*;
 use rustc_codegen_ssa::mir::debuginfo::{DebugScope, FunctionDebugContext, VariableKind};
 use rustc_codegen_ssa::traits::*;
-use rustc_data_structures::sync::Lrc;
 use rustc_data_structures::unord::UnordMap;
 use rustc_hir::def_id::{DefId, DefIdMap};
 use rustc_index::IndexVec;
@@ -22,6 +23,7 @@ use rustc_session::config::{self, DebugInfo};
 use rustc_span::{
     BytePos, Pos, SourceFile, SourceFileAndLine, SourceFileHash, Span, StableSourceFileId, Symbol,
 };
+use rustc_target::callconv::FnAbi;
 use rustc_target::spec::DebuginfoKind;
 use smallvec::SmallVec;
 use tracing::debug;
@@ -29,13 +31,12 @@ use tracing::debug;
 use self::metadata::{UNKNOWN_COLUMN_NUMBER, UNKNOWN_LINE_NUMBER, file_metadata, type_di_node};
 use self::namespace::mangled_name_of_instance;
 use self::utils::{DIB, create_DIArray, is_node_local_to_unit};
-use crate::abi::FnAbi;
 use crate::builder::Builder;
 use crate::common::{AsCCharPtr, CodegenCx};
 use crate::llvm;
 use crate::llvm::debuginfo::{
-    DIArray, DIBuilderBox, DIFile, DIFlags, DILexicalBlock, DILocation, DISPFlags, DIScope, DIType,
-    DIVariable,
+    DIArray, DIBuilderBox, DIFile, DIFlags, DILexicalBlock, DILocation, DISPFlags, DIScope,
+    DITemplateTypeParameter, DIType, DIVariable,
 };
 use crate::value::Value;
 
@@ -97,7 +98,11 @@ impl<'ll, 'tcx> CodegenUnitDebugContext<'ll, 'tcx> {
                 // Android has the same issue (#22398)
                 llvm::add_module_flag_u32(
                     self.llmod,
-                    llvm::ModuleFlagMergeBehavior::Warning,
+                    // In the case where multiple CGUs with different dwarf version
+                    // values are being merged together, such as with cross-crate
+                    // LTO, then we want to use the highest version of dwarf
+                    // we can. This matches Clang's behavior as well.
+                    llvm::ModuleFlagMergeBehavior::Max,
                     "Dwarf Version",
                     sess.dwarf_version(),
                 );
@@ -240,14 +245,14 @@ impl<'ll> DebugInfoBuilderMethods for Builder<'_, 'll, '_> {
 // `lookup_char_pos` return the right information instead.
 struct DebugLoc {
     /// Information about the original source file.
-    file: Lrc<SourceFile>,
+    file: Arc<SourceFile>,
     /// The (1-based) line number.
     line: u32,
     /// The (1-based) column number.
     col: u32,
 }
 
-impl CodegenCx<'_, '_> {
+impl<'ll> CodegenCx<'ll, '_> {
     /// Looks up debug source information about a `BytePos`.
     // FIXME(eddyb) rename this to better indicate it's a duplicate of
     // `lookup_char_pos` rather than `dbg_loc`, perhaps by making
@@ -275,6 +280,22 @@ impl CodegenCx<'_, '_> {
             DebugLoc { file, line, col }
         }
     }
+
+    fn create_template_type_parameter(
+        &self,
+        name: &str,
+        actual_type_metadata: &'ll DIType,
+    ) -> &'ll DITemplateTypeParameter {
+        unsafe {
+            llvm::LLVMRustDIBuilderCreateTemplateTypeParameter(
+                DIB(self),
+                None,
+                name.as_c_char_ptr(),
+                name.len(),
+                actual_type_metadata,
+            )
+        }
+    }
 }
 
 impl<'ll, 'tcx> DebugInfoCodegenMethods<'tcx> for CodegenCx<'ll, 'tcx> {
@@ -321,10 +342,8 @@ impl<'ll, 'tcx> DebugInfoCodegenMethods<'tcx> for CodegenCx<'ll, 'tcx> {
         let loc = self.lookup_debug_loc(span.lo());
         let file_metadata = file_metadata(self, &loc.file);
 
-        let function_type_metadata = unsafe {
-            let fn_signature = get_function_signature(self, fn_abi);
-            llvm::LLVMRustDIBuilderCreateSubroutineType(DIB(self), fn_signature)
-        };
+        let function_type_metadata =
+            create_subroutine_type(self, get_function_signature(self, fn_abi));
 
         let mut name = String::with_capacity(64);
         type_names::push_item_name(tcx, def_id, false, &mut name);
@@ -467,46 +486,10 @@ impl<'ll, 'tcx> DebugInfoCodegenMethods<'tcx> for CodegenCx<'ll, 'tcx> {
             generics: &ty::Generics,
             args: GenericArgsRef<'tcx>,
         ) -> &'ll DIArray {
-            if args.types().next().is_none() {
-                return create_DIArray(DIB(cx), &[]);
-            }
-
-            // Again, only create type information if full debuginfo is enabled
-            let template_params: Vec<_> = if cx.sess().opts.debuginfo == DebugInfo::Full {
-                let names = get_parameter_names(cx, generics);
-                iter::zip(args, names)
-                    .filter_map(|(kind, name)| {
-                        kind.as_type().map(|ty| {
-                            let actual_type = cx.tcx.normalize_erasing_regions(cx.typing_env(), ty);
-                            let actual_type_metadata = type_di_node(cx, actual_type);
-                            let name = name.as_str();
-                            unsafe {
-                                Some(llvm::LLVMRustDIBuilderCreateTemplateTypeParameter(
-                                    DIB(cx),
-                                    None,
-                                    name.as_c_char_ptr(),
-                                    name.len(),
-                                    actual_type_metadata,
-                                ))
-                            }
-                        })
-                    })
-                    .collect()
-            } else {
-                vec![]
-            };
-
+            let template_params = metadata::get_template_parameters(cx, generics, args);
             create_DIArray(DIB(cx), &template_params)
         }
 
-        fn get_parameter_names(cx: &CodegenCx<'_, '_>, generics: &ty::Generics) -> Vec<Symbol> {
-            let mut names = generics.parent.map_or_else(Vec::new, |def_id| {
-                get_parameter_names(cx, cx.tcx.generics_of(def_id))
-            });
-            names.extend(generics.own_params.iter().map(|param| param.name));
-            names
-        }
-
         /// Returns a scope, plus `true` if that's a type scope for "class" methods,
         /// otherwise `false` for plain namespace scopes.
         fn get_containing_scope<'ll, 'tcx>(
@@ -544,14 +527,17 @@ impl<'ll, 'tcx> DebugInfoCodegenMethods<'tcx> for CodegenCx<'ll, 'tcx> {
                 }
             }
 
-            let scope = namespace::item_namespace(cx, DefId {
-                krate: instance.def_id().krate,
-                index: cx
-                    .tcx
-                    .def_key(instance.def_id())
-                    .parent
-                    .expect("get_containing_scope: missing parent?"),
-            });
+            let scope = namespace::item_namespace(
+                cx,
+                DefId {
+                    krate: instance.def_id().krate,
+                    index: cx
+                        .tcx
+                        .def_key(instance.def_id())
+                        .parent
+                        .expect("get_containing_scope: missing parent?"),
+                },
+            );
             (scope, false)
         }
     }
@@ -633,7 +619,7 @@ impl<'ll, 'tcx> DebugInfoCodegenMethods<'tcx> for CodegenCx<'ll, 'tcx> {
                 true,
                 DIFlags::FlagZero,
                 argument_index,
-                align.bytes() as u32,
+                align.bits() as u32,
             )
         }
     }
diff --git a/compiler/rustc_codegen_llvm/src/declare.rs b/compiler/rustc_codegen_llvm/src/declare.rs
index bdc83267cca..2419ec1f888 100644
--- a/compiler/rustc_codegen_llvm/src/declare.rs
+++ b/compiler/rustc_codegen_llvm/src/declare.rs
@@ -11,17 +11,20 @@
 //! * Use define_* family of methods when you might be defining the Value.
 //! * When in doubt, define.
 
+use std::borrow::Borrow;
+
 use itertools::Itertools;
 use rustc_codegen_ssa::traits::TypeMembershipCodegenMethods;
 use rustc_data_structures::fx::FxIndexSet;
 use rustc_middle::ty::{Instance, Ty};
 use rustc_sanitizers::{cfi, kcfi};
+use rustc_target::callconv::FnAbi;
 use smallvec::SmallVec;
 use tracing::debug;
 
-use crate::abi::{FnAbi, FnAbiLlvmExt};
+use crate::abi::FnAbiLlvmExt;
 use crate::common::AsCCharPtr;
-use crate::context::{CodegenCx, SimpleCx};
+use crate::context::{CodegenCx, GenericCx, SCx, SimpleCx};
 use crate::llvm::AttributePlace::Function;
 use crate::llvm::Visibility;
 use crate::type_::Type;
@@ -80,16 +83,25 @@ pub(crate) fn declare_raw_fn<'ll, 'tcx>(
     llfn
 }
 
-impl<'ll, 'tcx> CodegenCx<'ll, 'tcx> {
+impl<'ll, CX: Borrow<SCx<'ll>>> GenericCx<'ll, CX> {
     /// Declare a global value.
     ///
     /// If there’s a value with the same name already declared, the function will
     /// return its Value instead.
     pub(crate) fn declare_global(&self, name: &str, ty: &'ll Type) -> &'ll Value {
         debug!("declare_global(name={:?})", name);
-        unsafe { llvm::LLVMRustGetOrInsertGlobal(self.llmod, name.as_c_char_ptr(), name.len(), ty) }
+        unsafe {
+            llvm::LLVMRustGetOrInsertGlobal(
+                (**self).borrow().llmod,
+                name.as_c_char_ptr(),
+                name.len(),
+                ty,
+            )
+        }
     }
+}
 
+impl<'ll, 'tcx> CodegenCx<'ll, 'tcx> {
     /// Declare a C ABI function.
     ///
     /// Only use this for foreign function ABIs and glue. For Rust functions use
@@ -235,7 +247,7 @@ impl<'ll, 'tcx> CodegenCx<'ll, 'tcx> {
     /// name.
     pub(crate) fn get_defined_value(&self, name: &str) -> Option<&'ll Value> {
         self.get_declared_value(name).and_then(|val| {
-            let declaration = unsafe { llvm::LLVMIsDeclaration(val) != 0 };
+            let declaration = llvm::is_declaration(val);
             if !declaration { Some(val) } else { None }
         })
     }
diff --git a/compiler/rustc_codegen_llvm/src/errors.rs b/compiler/rustc_codegen_llvm/src/errors.rs
index f4c9491f758..4c5a78ca74f 100644
--- a/compiler/rustc_codegen_llvm/src/errors.rs
+++ b/compiler/rustc_codegen_llvm/src/errors.rs
@@ -92,10 +92,13 @@ impl<G: EmissionGuarantee> Diagnostic<'_, G> for ParseTargetMachineConfig<'_> {
 
 #[derive(Diagnostic)]
 #[diag(codegen_llvm_autodiff_without_lto)]
-#[note]
 pub(crate) struct AutoDiffWithoutLTO;
 
 #[derive(Diagnostic)]
+#[diag(codegen_llvm_autodiff_without_enable)]
+pub(crate) struct AutoDiffWithoutEnable;
+
+#[derive(Diagnostic)]
 #[diag(codegen_llvm_lto_disallowed)]
 pub(crate) struct LtoDisallowed;
 
@@ -131,8 +134,6 @@ pub enum LlvmError<'a> {
     LoadBitcode { name: CString },
     #[diag(codegen_llvm_write_thinlto_key)]
     WriteThinLtoKey { err: std::io::Error },
-    #[diag(codegen_llvm_multiple_source_dicompileunit)]
-    MultipleSourceDiCompileUnit,
     #[diag(codegen_llvm_prepare_thin_lto_module)]
     PrepareThinLtoModule,
     #[diag(codegen_llvm_parse_bitcode)]
@@ -155,9 +156,6 @@ impl<G: EmissionGuarantee> Diagnostic<'_, G> for WithLlvmError<'_> {
             PrepareThinLtoContext => fluent::codegen_llvm_prepare_thin_lto_context_with_llvm_err,
             LoadBitcode { .. } => fluent::codegen_llvm_load_bitcode_with_llvm_err,
             WriteThinLtoKey { .. } => fluent::codegen_llvm_write_thinlto_key_with_llvm_err,
-            MultipleSourceDiCompileUnit => {
-                fluent::codegen_llvm_multiple_source_dicompileunit_with_llvm_err
-            }
             PrepareThinLtoModule => fluent::codegen_llvm_prepare_thin_lto_module_with_llvm_err,
             ParseBitcode => fluent::codegen_llvm_parse_bitcode_with_llvm_err,
             PrepareAutoDiff { .. } => fluent::codegen_llvm_prepare_autodiff_with_llvm_err,
diff --git a/compiler/rustc_codegen_llvm/src/intrinsic.rs b/compiler/rustc_codegen_llvm/src/intrinsic.rs
index 43d6ccfcb4a..67135fcc308 100644
--- a/compiler/rustc_codegen_llvm/src/intrinsic.rs
+++ b/compiler/rustc_codegen_llvm/src/intrinsic.rs
@@ -1,7 +1,7 @@
 use std::assert_matches::assert_matches;
 use std::cmp::Ordering;
 
-use rustc_abi::{self as abi, Align, Float, HasDataLayout, Primitive, Size};
+use rustc_abi::{Align, BackendRepr, ExternAbi, Float, HasDataLayout, Primitive, Size};
 use rustc_codegen_ssa::base::{compare_simd_types, wants_msvc_seh, wants_wasm_eh};
 use rustc_codegen_ssa::common::{IntPredicate, TypeKind};
 use rustc_codegen_ssa::errors::{ExpectedPointerMutability, InvalidMonomorphization};
@@ -14,10 +14,12 @@ use rustc_middle::ty::layout::{FnAbiOf, HasTyCtxt, HasTypingEnv, LayoutOf};
 use rustc_middle::ty::{self, GenericArgsRef, Ty};
 use rustc_middle::{bug, span_bug};
 use rustc_span::{Span, Symbol, sym};
+use rustc_symbol_mangling::mangle_internal_symbol;
+use rustc_target::callconv::{FnAbi, PassMode};
 use rustc_target::spec::{HasTargetSpec, PanicStrategy};
 use tracing::debug;
 
-use crate::abi::{ExternAbi, FnAbi, FnAbiLlvmExt, LlvmType, PassMode};
+use crate::abi::{FnAbiLlvmExt, LlvmType};
 use crate::builder::Builder;
 use crate::context::CodegenCx;
 use crate::llvm::{self, Metadata};
@@ -126,15 +128,14 @@ fn get_simple_intrinsic<'ll>(
         sym::truncf64 => "llvm.trunc.f64",
         sym::truncf128 => "llvm.trunc.f128",
 
-        sym::rintf16 => "llvm.rint.f16",
-        sym::rintf32 => "llvm.rint.f32",
-        sym::rintf64 => "llvm.rint.f64",
-        sym::rintf128 => "llvm.rint.f128",
-
-        sym::nearbyintf16 => "llvm.nearbyint.f16",
-        sym::nearbyintf32 => "llvm.nearbyint.f32",
-        sym::nearbyintf64 => "llvm.nearbyint.f64",
-        sym::nearbyintf128 => "llvm.nearbyint.f128",
+        // We could use any of `rint`, `nearbyint`, or `roundeven`
+        // for this -- they are all identical in semantics when
+        // assuming the default FP environment.
+        // `rint` is what we used for $forever.
+        sym::round_ties_even_f16 => "llvm.rint.f16",
+        sym::round_ties_even_f32 => "llvm.rint.f32",
+        sym::round_ties_even_f64 => "llvm.rint.f64",
+        sym::round_ties_even_f128 => "llvm.rint.f128",
 
         sym::roundf16 => "llvm.round.f16",
         sym::roundf32 => "llvm.round.f32",
@@ -143,11 +144,6 @@ fn get_simple_intrinsic<'ll>(
 
         sym::ptr_mask => "llvm.ptrmask",
 
-        sym::roundevenf16 => "llvm.roundeven.f16",
-        sym::roundevenf32 => "llvm.roundeven.f32",
-        sym::roundevenf64 => "llvm.roundeven.f64",
-        sym::roundevenf128 => "llvm.roundeven.f128",
-
         _ => return None,
     };
     Some(cx.get_intrinsic(llvm_name))
@@ -257,7 +253,7 @@ impl<'ll, 'tcx> IntrinsicCallBuilderMethods<'tcx> for Builder<'_, 'll, 'tcx> {
             }
             sym::va_arg => {
                 match fn_abi.ret.layout.backend_repr {
-                    abi::BackendRepr::Scalar(scalar) => {
+                    BackendRepr::Scalar(scalar) => {
                         match scalar.primitive() {
                             Primitive::Int(..) => {
                                 if self.cx().size_of(ret_ty).bytes() < 4 {
@@ -333,12 +329,15 @@ impl<'ll, 'tcx> IntrinsicCallBuilderMethods<'tcx> for Builder<'_, 'll, 'tcx> {
                     sym::prefetch_write_instruction => (1, 0),
                     _ => bug!(),
                 };
-                self.call_intrinsic("llvm.prefetch", &[
-                    args[0].immediate(),
-                    self.const_i32(rw),
-                    args[1].immediate(),
-                    self.const_i32(cache_type),
-                ])
+                self.call_intrinsic(
+                    "llvm.prefetch",
+                    &[
+                        args[0].immediate(),
+                        self.const_i32(rw),
+                        args[1].immediate(),
+                        self.const_i32(cache_type),
+                    ],
+                )
             }
             sym::carrying_mul_add => {
                 let (size, signed) = fn_args.type_at(0).int_size_and_signed(self.tcx);
@@ -396,10 +395,10 @@ impl<'ll, 'tcx> IntrinsicCallBuilderMethods<'tcx> for Builder<'_, 'll, 'tcx> {
                 match name {
                     sym::ctlz | sym::cttz => {
                         let y = self.const_bool(false);
-                        let ret = self.call_intrinsic(&format!("llvm.{name}.i{width}"), &[
-                            args[0].immediate(),
-                            y,
-                        ]);
+                        let ret = self.call_intrinsic(
+                            &format!("llvm.{name}.i{width}"),
+                            &[args[0].immediate(), y],
+                        );
 
                         self.intcast(ret, llret_ty, false)
                     }
@@ -416,24 +415,26 @@ impl<'ll, 'tcx> IntrinsicCallBuilderMethods<'tcx> for Builder<'_, 'll, 'tcx> {
                         self.intcast(ret, llret_ty, false)
                     }
                     sym::ctpop => {
-                        let ret = self.call_intrinsic(&format!("llvm.ctpop.i{width}"), &[
-                            args[0].immediate()
-                        ]);
+                        let ret = self.call_intrinsic(
+                            &format!("llvm.ctpop.i{width}"),
+                            &[args[0].immediate()],
+                        );
                         self.intcast(ret, llret_ty, false)
                     }
                     sym::bswap => {
                         if width == 8 {
                             args[0].immediate() // byte swap a u8/i8 is just a no-op
                         } else {
-                            self.call_intrinsic(&format!("llvm.bswap.i{width}"), &[
-                                args[0].immediate()
-                            ])
+                            self.call_intrinsic(
+                                &format!("llvm.bswap.i{width}"),
+                                &[args[0].immediate()],
+                            )
                         }
                     }
-                    sym::bitreverse => self
-                        .call_intrinsic(&format!("llvm.bitreverse.i{width}"), &[
-                            args[0].immediate()
-                        ]),
+                    sym::bitreverse => self.call_intrinsic(
+                        &format!("llvm.bitreverse.i{width}"),
+                        &[args[0].immediate()],
+                    ),
                     sym::rotate_left | sym::rotate_right => {
                         let is_left = name == sym::rotate_left;
                         let val = args[0].immediate();
@@ -465,12 +466,12 @@ impl<'ll, 'tcx> IntrinsicCallBuilderMethods<'tcx> for Builder<'_, 'll, 'tcx> {
             }
 
             sym::raw_eq => {
-                use abi::BackendRepr::*;
+                use BackendRepr::*;
                 let tp_ty = fn_args.type_at(0);
                 let layout = self.layout_of(tp_ty).layout;
                 let use_integer_compare = match layout.backend_repr() {
                     Scalar(_) | ScalarPair(_, _) => true,
-                    Uninhabited | Vector { .. } => false,
+                    SimdVector { .. } => false,
                     Memory { .. } => {
                         // For rusty ABIs, small aggregates are actually passed
                         // as `RegKind::Integer` (see `FnAbi::adjust_for_abi`),
@@ -500,11 +501,10 @@ impl<'ll, 'tcx> IntrinsicCallBuilderMethods<'tcx> for Builder<'_, 'll, 'tcx> {
 
             sym::compare_bytes => {
                 // Here we assume that the `memcmp` provided by the target is a NOP for size 0.
-                let cmp = self.call_intrinsic("memcmp", &[
-                    args[0].immediate(),
-                    args[1].immediate(),
-                    args[2].immediate(),
-                ]);
+                let cmp = self.call_intrinsic(
+                    "memcmp",
+                    &[args[0].immediate(), args[1].immediate(), args[2].immediate()],
+                );
                 // Some targets have `memcmp` returning `i16`, but the intrinsic is always `i32`.
                 self.sext(cmp, self.type_ix(32))
             }
@@ -578,8 +578,7 @@ impl<'ll, 'tcx> IntrinsicCallBuilderMethods<'tcx> for Builder<'_, 'll, 'tcx> {
                 }
 
                 let llret_ty = if ret_ty.is_simd()
-                    && let abi::BackendRepr::Memory { .. } =
-                        self.layout_of(ret_ty).layout.backend_repr
+                    && let BackendRepr::Memory { .. } = self.layout_of(ret_ty).layout.backend_repr
                 {
                     let (size, elem_ty) = ret_ty.simd_size_and_type(self.tcx());
                     let elem_ll_ty = match elem_ty.kind() {
@@ -651,7 +650,7 @@ impl<'ll, 'tcx> IntrinsicCallBuilderMethods<'tcx> for Builder<'_, 'll, 'tcx> {
     fn type_test(&mut self, pointer: Self::Value, typeid: Self::Metadata) -> Self::Value {
         // Test the called operand using llvm.type.test intrinsic. The LowerTypeTests link-time
         // optimization pass replaces calls to this intrinsic with code to test type membership.
-        let typeid = unsafe { llvm::LLVMMetadataAsValue(&self.llcx, typeid) };
+        let typeid = self.get_metadata_value(typeid);
         self.call_intrinsic("llvm.type.test", &[pointer, typeid])
     }
 
@@ -661,7 +660,7 @@ impl<'ll, 'tcx> IntrinsicCallBuilderMethods<'tcx> for Builder<'_, 'll, 'tcx> {
         vtable_byte_offset: u64,
         typeid: &'ll Metadata,
     ) -> Self::Value {
-        let typeid = unsafe { llvm::LLVMMetadataAsValue(&self.llcx, typeid) };
+        let typeid = self.get_metadata_value(typeid);
         let vtable_byte_offset = self.const_i32(vtable_byte_offset as i32);
         let type_checked_load =
             self.call_intrinsic("llvm.type.checked.load", &[llvtable, vtable_byte_offset, typeid]);
@@ -814,13 +813,16 @@ fn codegen_msvc_try<'ll>(
         let type_name = bx.const_bytes(b"rust_panic\0");
         let type_info =
             bx.const_struct(&[type_info_vtable, bx.const_null(bx.type_ptr()), type_name], false);
-        let tydesc = bx.declare_global("__rust_panic_type_info", bx.val_ty(type_info));
+        let tydesc = bx.declare_global(
+            &mangle_internal_symbol(bx.tcx, "__rust_panic_type_info"),
+            bx.val_ty(type_info),
+        );
 
         llvm::set_linkage(tydesc, llvm::Linkage::LinkOnceODRLinkage);
         if bx.cx.tcx.sess.target.supports_comdat() {
             llvm::SetUniqueComdat(bx.llmod, tydesc);
         }
-        unsafe { llvm::LLVMSetInitializer(tydesc, type_info) };
+        llvm::set_initializer(tydesc, type_info);
 
         // The flag value of 8 indicates that we are catching the exception by
         // reference instead of by value. We can't use catch by value because
@@ -1305,14 +1307,17 @@ fn generic_simd_intrinsic<'ll, 'tcx>(
     if let Some(cmp_op) = comparison {
         let (out_len, out_ty) = require_simd!(ret_ty, SimdReturn);
 
-        require!(in_len == out_len, InvalidMonomorphization::ReturnLengthInputType {
-            span,
-            name,
-            in_len,
-            in_ty,
-            ret_ty,
-            out_len
-        });
+        require!(
+            in_len == out_len,
+            InvalidMonomorphization::ReturnLengthInputType {
+                span,
+                name,
+                in_len,
+                in_ty,
+                ret_ty,
+                out_len
+            }
+        );
         require!(
             bx.type_kind(bx.element_type(llret_ty)) == TypeKind::Integer,
             InvalidMonomorphization::ReturnIntegerType { span, name, ret_ty, out_ty }
@@ -1328,26 +1333,19 @@ fn generic_simd_intrinsic<'ll, 'tcx>(
         ));
     }
 
-    if name == sym::simd_shuffle_generic {
+    if name == sym::simd_shuffle_const_generic {
         let idx = fn_args[2].expect_const().to_value().valtree.unwrap_branch();
         let n = idx.len() as u64;
 
         let (out_len, out_ty) = require_simd!(ret_ty, SimdReturn);
-        require!(out_len == n, InvalidMonomorphization::ReturnLength {
-            span,
-            name,
-            in_len: n,
-            ret_ty,
-            out_len
-        });
-        require!(in_elem == out_ty, InvalidMonomorphization::ReturnElement {
-            span,
-            name,
-            in_elem,
-            in_ty,
-            ret_ty,
-            out_ty
-        });
+        require!(
+            out_len == n,
+            InvalidMonomorphization::ReturnLength { span, name, in_len: n, ret_ty, out_len }
+        );
+        require!(
+            in_elem == out_ty,
+            InvalidMonomorphization::ReturnElement { span, name, in_elem, in_ty, ret_ty, out_ty }
+        );
 
         let total_len = in_len * 2;
 
@@ -1392,21 +1390,14 @@ fn generic_simd_intrinsic<'ll, 'tcx>(
         };
 
         let (out_len, out_ty) = require_simd!(ret_ty, SimdReturn);
-        require!(out_len == n, InvalidMonomorphization::ReturnLength {
-            span,
-            name,
-            in_len: n,
-            ret_ty,
-            out_len
-        });
-        require!(in_elem == out_ty, InvalidMonomorphization::ReturnElement {
-            span,
-            name,
-            in_elem,
-            in_ty,
-            ret_ty,
-            out_ty
-        });
+        require!(
+            out_len == n,
+            InvalidMonomorphization::ReturnLength { span, name, in_len: n, ret_ty, out_len }
+        );
+        require!(
+            in_elem == out_ty,
+            InvalidMonomorphization::ReturnElement { span, name, in_elem, in_ty, ret_ty, out_ty }
+        );
 
         let total_len = u128::from(in_len) * 2;
 
@@ -1431,13 +1422,16 @@ fn generic_simd_intrinsic<'ll, 'tcx>(
     }
 
     if name == sym::simd_insert {
-        require!(in_elem == arg_tys[2], InvalidMonomorphization::InsertedType {
-            span,
-            name,
-            in_elem,
-            in_ty,
-            out_ty: arg_tys[2]
-        });
+        require!(
+            in_elem == arg_tys[2],
+            InvalidMonomorphization::InsertedType {
+                span,
+                name,
+                in_elem,
+                in_ty,
+                out_ty: arg_tys[2]
+            }
+        );
         let idx = bx
             .const_to_opt_u128(args[1].immediate(), false)
             .expect("typeck should have ensure that this is a const");
@@ -1456,13 +1450,10 @@ fn generic_simd_intrinsic<'ll, 'tcx>(
         ));
     }
     if name == sym::simd_extract {
-        require!(ret_ty == in_elem, InvalidMonomorphization::ReturnType {
-            span,
-            name,
-            in_elem,
-            in_ty,
-            ret_ty
-        });
+        require!(
+            ret_ty == in_elem,
+            InvalidMonomorphization::ReturnType { span, name, in_elem, in_ty, ret_ty }
+        );
         let idx = bx
             .const_to_opt_u128(args[1].immediate(), false)
             .expect("typeck should have ensure that this is a const");
@@ -1481,18 +1472,14 @@ fn generic_simd_intrinsic<'ll, 'tcx>(
         let m_elem_ty = in_elem;
         let m_len = in_len;
         let (v_len, _) = require_simd!(arg_tys[1], SimdArgument);
-        require!(m_len == v_len, InvalidMonomorphization::MismatchedLengths {
-            span,
-            name,
-            m_len,
-            v_len
-        });
-        let in_elem_bitwidth =
-            require_int_ty!(m_elem_ty.kind(), InvalidMonomorphization::MaskType {
-                span,
-                name,
-                ty: m_elem_ty
-            });
+        require!(
+            m_len == v_len,
+            InvalidMonomorphization::MismatchedLengths { span, name, m_len, v_len }
+        );
+        let in_elem_bitwidth = require_int_ty!(
+            m_elem_ty.kind(),
+            InvalidMonomorphization::MaskType { span, name, ty: m_elem_ty }
+        );
         let m_i1s = vector_mask_to_bitmask(bx, args[0].immediate(), in_elem_bitwidth, m_len);
         return Ok(bx.select(m_i1s, args[1].immediate(), args[2].immediate()));
     }
@@ -1510,13 +1497,10 @@ fn generic_simd_intrinsic<'ll, 'tcx>(
         let expected_bytes = in_len.div_ceil(8);
 
         // Integer vector <i{in_bitwidth} x in_len>:
-        let in_elem_bitwidth =
-            require_int_or_uint_ty!(in_elem.kind(), InvalidMonomorphization::VectorArgument {
-                span,
-                name,
-                in_ty,
-                in_elem
-            });
+        let in_elem_bitwidth = require_int_or_uint_ty!(
+            in_elem.kind(),
+            InvalidMonomorphization::VectorArgument { span, name, in_ty, in_elem }
+        );
 
         let i1xn = vector_mask_to_bitmask(bx, args[0].immediate(), in_elem_bitwidth, in_len);
         // Bitcast <i1 x N> to iN:
@@ -1601,8 +1585,6 @@ fn generic_simd_intrinsic<'ll, 'tcx>(
             sym::simd_floor => ("floor", bx.type_func(&[vec_ty], vec_ty)),
             sym::simd_fma => ("fma", bx.type_func(&[vec_ty, vec_ty, vec_ty], vec_ty)),
             sym::simd_relaxed_fma => ("fmuladd", bx.type_func(&[vec_ty, vec_ty, vec_ty], vec_ty)),
-            sym::simd_fpowi => ("powi", bx.type_func(&[vec_ty, bx.type_i32()], vec_ty)),
-            sym::simd_fpow => ("pow", bx.type_func(&[vec_ty, vec_ty], vec_ty)),
             sym::simd_fsin => ("sin", bx.type_func(&[vec_ty], vec_ty)),
             sym::simd_fsqrt => ("sqrt", bx.type_func(&[vec_ty], vec_ty)),
             sym::simd_round => ("round", bx.type_func(&[vec_ty], vec_ty)),
@@ -1635,8 +1617,6 @@ fn generic_simd_intrinsic<'ll, 'tcx>(
             | sym::simd_flog
             | sym::simd_floor
             | sym::simd_fma
-            | sym::simd_fpow
-            | sym::simd_fpowi
             | sym::simd_fsin
             | sym::simd_fsqrt
             | sym::simd_relaxed_fma
@@ -1698,30 +1678,34 @@ fn generic_simd_intrinsic<'ll, 'tcx>(
         require_simd!(ret_ty, SimdReturn);
 
         // Of the same length:
-        require!(in_len == out_len, InvalidMonomorphization::SecondArgumentLength {
-            span,
-            name,
-            in_len,
-            in_ty,
-            arg_ty: arg_tys[1],
-            out_len
-        });
-        require!(in_len == out_len2, InvalidMonomorphization::ThirdArgumentLength {
-            span,
-            name,
-            in_len,
-            in_ty,
-            arg_ty: arg_tys[2],
-            out_len: out_len2
-        });
+        require!(
+            in_len == out_len,
+            InvalidMonomorphization::SecondArgumentLength {
+                span,
+                name,
+                in_len,
+                in_ty,
+                arg_ty: arg_tys[1],
+                out_len
+            }
+        );
+        require!(
+            in_len == out_len2,
+            InvalidMonomorphization::ThirdArgumentLength {
+                span,
+                name,
+                in_len,
+                in_ty,
+                arg_ty: arg_tys[2],
+                out_len: out_len2
+            }
+        );
 
         // The return type must match the first argument type
-        require!(ret_ty == in_ty, InvalidMonomorphization::ExpectedReturnType {
-            span,
-            name,
-            in_ty,
-            ret_ty
-        });
+        require!(
+            ret_ty == in_ty,
+            InvalidMonomorphization::ExpectedReturnType { span, name, in_ty, ret_ty }
+        );
 
         require!(
             matches!(
@@ -1739,13 +1723,15 @@ fn generic_simd_intrinsic<'ll, 'tcx>(
             }
         );
 
-        let mask_elem_bitwidth =
-            require_int_ty!(element_ty2.kind(), InvalidMonomorphization::ThirdArgElementType {
+        let mask_elem_bitwidth = require_int_ty!(
+            element_ty2.kind(),
+            InvalidMonomorphization::ThirdArgElementType {
                 span,
                 name,
                 expected_element: element_ty2,
                 third_arg: arg_tys[2]
-            });
+            }
+        );
 
         // Alignment of T, must be a constant integer value:
         let alignment_ty = bx.type_i32();
@@ -1805,22 +1791,23 @@ fn generic_simd_intrinsic<'ll, 'tcx>(
         require_simd!(ret_ty, SimdReturn);
 
         // Of the same length:
-        require!(values_len == mask_len, InvalidMonomorphization::ThirdArgumentLength {
-            span,
-            name,
-            in_len: mask_len,
-            in_ty: mask_ty,
-            arg_ty: values_ty,
-            out_len: values_len
-        });
+        require!(
+            values_len == mask_len,
+            InvalidMonomorphization::ThirdArgumentLength {
+                span,
+                name,
+                in_len: mask_len,
+                in_ty: mask_ty,
+                arg_ty: values_ty,
+                out_len: values_len
+            }
+        );
 
         // The return type must match the last argument type
-        require!(ret_ty == values_ty, InvalidMonomorphization::ExpectedReturnType {
-            span,
-            name,
-            in_ty: values_ty,
-            ret_ty
-        });
+        require!(
+            ret_ty == values_ty,
+            InvalidMonomorphization::ExpectedReturnType { span, name, in_ty: values_ty, ret_ty }
+        );
 
         require!(
             matches!(
@@ -1838,13 +1825,15 @@ fn generic_simd_intrinsic<'ll, 'tcx>(
             }
         );
 
-        let m_elem_bitwidth =
-            require_int_ty!(mask_elem.kind(), InvalidMonomorphization::ThirdArgElementType {
+        let m_elem_bitwidth = require_int_ty!(
+            mask_elem.kind(),
+            InvalidMonomorphization::ThirdArgElementType {
                 span,
                 name,
                 expected_element: values_elem,
                 third_arg: mask_ty,
-            });
+            }
+        );
 
         let mask = vector_mask_to_bitmask(bx, args[0].immediate(), m_elem_bitwidth, mask_len);
         let mask_ty = bx.type_vector(bx.type_i1(), mask_len);
@@ -1896,14 +1885,17 @@ fn generic_simd_intrinsic<'ll, 'tcx>(
         let (values_len, values_elem) = require_simd!(values_ty, SimdThird);
 
         // Of the same length:
-        require!(values_len == mask_len, InvalidMonomorphization::ThirdArgumentLength {
-            span,
-            name,
-            in_len: mask_len,
-            in_ty: mask_ty,
-            arg_ty: values_ty,
-            out_len: values_len
-        });
+        require!(
+            values_len == mask_len,
+            InvalidMonomorphization::ThirdArgumentLength {
+                span,
+                name,
+                in_len: mask_len,
+                in_ty: mask_ty,
+                arg_ty: values_ty,
+                out_len: values_len
+            }
+        );
 
         // The second argument must be a mutable pointer type matching the element type
         require!(
@@ -1923,13 +1915,15 @@ fn generic_simd_intrinsic<'ll, 'tcx>(
             }
         );
 
-        let m_elem_bitwidth =
-            require_int_ty!(mask_elem.kind(), InvalidMonomorphization::ThirdArgElementType {
+        let m_elem_bitwidth = require_int_ty!(
+            mask_elem.kind(),
+            InvalidMonomorphization::ThirdArgElementType {
                 span,
                 name,
                 expected_element: values_elem,
                 third_arg: mask_ty,
-            });
+            }
+        );
 
         let mask = vector_mask_to_bitmask(bx, args[0].immediate(), m_elem_bitwidth, mask_len);
         let mask_ty = bx.type_vector(bx.type_i1(), mask_len);
@@ -1976,22 +1970,28 @@ fn generic_simd_intrinsic<'ll, 'tcx>(
         let (element_len2, element_ty2) = require_simd!(arg_tys[2], SimdThird);
 
         // Of the same length:
-        require!(in_len == element_len1, InvalidMonomorphization::SecondArgumentLength {
-            span,
-            name,
-            in_len,
-            in_ty,
-            arg_ty: arg_tys[1],
-            out_len: element_len1
-        });
-        require!(in_len == element_len2, InvalidMonomorphization::ThirdArgumentLength {
-            span,
-            name,
-            in_len,
-            in_ty,
-            arg_ty: arg_tys[2],
-            out_len: element_len2
-        });
+        require!(
+            in_len == element_len1,
+            InvalidMonomorphization::SecondArgumentLength {
+                span,
+                name,
+                in_len,
+                in_ty,
+                arg_ty: arg_tys[1],
+                out_len: element_len1
+            }
+        );
+        require!(
+            in_len == element_len2,
+            InvalidMonomorphization::ThirdArgumentLength {
+                span,
+                name,
+                in_len,
+                in_ty,
+                arg_ty: arg_tys[2],
+                out_len: element_len2
+            }
+        );
 
         require!(
             matches!(
@@ -2011,13 +2011,15 @@ fn generic_simd_intrinsic<'ll, 'tcx>(
         );
 
         // The element type of the third argument must be a signed integer type of any width:
-        let mask_elem_bitwidth =
-            require_int_ty!(element_ty2.kind(), InvalidMonomorphization::ThirdArgElementType {
+        let mask_elem_bitwidth = require_int_ty!(
+            element_ty2.kind(),
+            InvalidMonomorphization::ThirdArgElementType {
                 span,
                 name,
                 expected_element: element_ty2,
                 third_arg: arg_tys[2]
-            });
+            }
+        );
 
         // Alignment of T, must be a constant integer value:
         let alignment_ty = bx.type_i32();
@@ -2058,13 +2060,10 @@ fn generic_simd_intrinsic<'ll, 'tcx>(
         ($name:ident : $integer_reduce:ident, $float_reduce:ident, $ordered:expr, $op:ident,
          $identity:expr) => {
             if name == sym::$name {
-                require!(ret_ty == in_elem, InvalidMonomorphization::ReturnType {
-                    span,
-                    name,
-                    in_elem,
-                    in_ty,
-                    ret_ty
-                });
+                require!(
+                    ret_ty == in_elem,
+                    InvalidMonomorphization::ReturnType { span, name, in_elem, in_ty, ret_ty }
+                );
                 return match in_elem.kind() {
                     ty::Int(_) | ty::Uint(_) => {
                         let r = bx.$integer_reduce(args[0].immediate());
@@ -2133,13 +2132,10 @@ fn generic_simd_intrinsic<'ll, 'tcx>(
     macro_rules! minmax_red {
         ($name:ident: $int_red:ident, $float_red:ident) => {
             if name == sym::$name {
-                require!(ret_ty == in_elem, InvalidMonomorphization::ReturnType {
-                    span,
-                    name,
-                    in_elem,
-                    in_ty,
-                    ret_ty
-                });
+                require!(
+                    ret_ty == in_elem,
+                    InvalidMonomorphization::ReturnType { span, name, in_elem, in_ty, ret_ty }
+                );
                 return match in_elem.kind() {
                     ty::Int(_i) => Ok(bx.$int_red(args[0].immediate(), true)),
                     ty::Uint(_u) => Ok(bx.$int_red(args[0].immediate(), false)),
@@ -2164,13 +2160,10 @@ fn generic_simd_intrinsic<'ll, 'tcx>(
         ($name:ident : $red:ident, $boolean:expr) => {
             if name == sym::$name {
                 let input = if !$boolean {
-                    require!(ret_ty == in_elem, InvalidMonomorphization::ReturnType {
-                        span,
-                        name,
-                        in_elem,
-                        in_ty,
-                        ret_ty
-                    });
+                    require!(
+                        ret_ty == in_elem,
+                        InvalidMonomorphization::ReturnType { span, name, in_elem, in_ty, ret_ty }
+                    );
                     args[0].immediate()
                 } else {
                     let bitwidth = match in_elem.kind() {
@@ -2218,25 +2211,27 @@ fn generic_simd_intrinsic<'ll, 'tcx>(
 
     if name == sym::simd_cast_ptr {
         let (out_len, out_elem) = require_simd!(ret_ty, SimdReturn);
-        require!(in_len == out_len, InvalidMonomorphization::ReturnLengthInputType {
-            span,
-            name,
-            in_len,
-            in_ty,
-            ret_ty,
-            out_len
-        });
+        require!(
+            in_len == out_len,
+            InvalidMonomorphization::ReturnLengthInputType {
+                span,
+                name,
+                in_len,
+                in_ty,
+                ret_ty,
+                out_len
+            }
+        );
 
         match in_elem.kind() {
             ty::RawPtr(p_ty, _) => {
                 let metadata = p_ty.ptr_metadata_ty(bx.tcx, |ty| {
                     bx.tcx.normalize_erasing_regions(bx.typing_env(), ty)
                 });
-                require!(metadata.is_unit(), InvalidMonomorphization::CastWidePointer {
-                    span,
-                    name,
-                    ty: in_elem
-                });
+                require!(
+                    metadata.is_unit(),
+                    InvalidMonomorphization::CastWidePointer { span, name, ty: in_elem }
+                );
             }
             _ => {
                 return_error!(InvalidMonomorphization::ExpectedPointer { span, name, ty: in_elem })
@@ -2247,11 +2242,10 @@ fn generic_simd_intrinsic<'ll, 'tcx>(
                 let metadata = p_ty.ptr_metadata_ty(bx.tcx, |ty| {
                     bx.tcx.normalize_erasing_regions(bx.typing_env(), ty)
                 });
-                require!(metadata.is_unit(), InvalidMonomorphization::CastWidePointer {
-                    span,
-                    name,
-                    ty: out_elem
-                });
+                require!(
+                    metadata.is_unit(),
+                    InvalidMonomorphization::CastWidePointer { span, name, ty: out_elem }
+                );
             }
             _ => {
                 return_error!(InvalidMonomorphization::ExpectedPointer { span, name, ty: out_elem })
@@ -2263,14 +2257,17 @@ fn generic_simd_intrinsic<'ll, 'tcx>(
 
     if name == sym::simd_expose_provenance {
         let (out_len, out_elem) = require_simd!(ret_ty, SimdReturn);
-        require!(in_len == out_len, InvalidMonomorphization::ReturnLengthInputType {
-            span,
-            name,
-            in_len,
-            in_ty,
-            ret_ty,
-            out_len
-        });
+        require!(
+            in_len == out_len,
+            InvalidMonomorphization::ReturnLengthInputType {
+                span,
+                name,
+                in_len,
+                in_ty,
+                ret_ty,
+                out_len
+            }
+        );
 
         match in_elem.kind() {
             ty::RawPtr(_, _) => {}
@@ -2288,14 +2285,17 @@ fn generic_simd_intrinsic<'ll, 'tcx>(
 
     if name == sym::simd_with_exposed_provenance {
         let (out_len, out_elem) = require_simd!(ret_ty, SimdReturn);
-        require!(in_len == out_len, InvalidMonomorphization::ReturnLengthInputType {
-            span,
-            name,
-            in_len,
-            in_ty,
-            ret_ty,
-            out_len
-        });
+        require!(
+            in_len == out_len,
+            InvalidMonomorphization::ReturnLengthInputType {
+                span,
+                name,
+                in_len,
+                in_ty,
+                ret_ty,
+                out_len
+            }
+        );
 
         match in_elem.kind() {
             ty::Uint(ty::UintTy::Usize) => {}
@@ -2313,14 +2313,17 @@ fn generic_simd_intrinsic<'ll, 'tcx>(
 
     if name == sym::simd_cast || name == sym::simd_as {
         let (out_len, out_elem) = require_simd!(ret_ty, SimdReturn);
-        require!(in_len == out_len, InvalidMonomorphization::ReturnLengthInputType {
-            span,
-            name,
-            in_len,
-            in_ty,
-            ret_ty,
-            out_len
-        });
+        require!(
+            in_len == out_len,
+            InvalidMonomorphization::ReturnLengthInputType {
+                span,
+                name,
+                in_len,
+                in_ty,
+                ret_ty,
+                out_len
+            }
+        );
         // casting cares about nominal type, not just structural type
         if in_elem == out_elem {
             return Ok(args[0].immediate());
diff --git a/compiler/rustc_codegen_llvm/src/lib.rs b/compiler/rustc_codegen_llvm/src/lib.rs
index 4a84fd29e44..425381b0ffa 100644
--- a/compiler/rustc_codegen_llvm/src/lib.rs
+++ b/compiler/rustc_codegen_llvm/src/lib.rs
@@ -12,14 +12,13 @@
 #![feature(exact_size_is_empty)]
 #![feature(extern_types)]
 #![feature(file_buffered)]
-#![feature(hash_raw_entry)]
+#![feature(if_let_guard)]
 #![feature(impl_trait_in_assoc_type)]
 #![feature(iter_intersperse)]
 #![feature(let_chains)]
 #![feature(rustdoc_internals)]
 #![feature(slice_as_array)]
 #![feature(try_blocks)]
-#![warn(unreachable_pub)]
 // tidy-alphabetical-end
 
 use std::any::Any;
@@ -28,8 +27,9 @@ use std::mem::ManuallyDrop;
 
 use back::owned_target_machine::OwnedTargetMachine;
 use back::write::{create_informational_target_machine, create_target_machine};
+use context::SimpleCx;
 use errors::{AutoDiffWithoutLTO, ParseTargetMachineConfig};
-pub use llvm_util::target_features_cfg;
+use llvm_util::target_features_cfg;
 use rustc_ast::expand::allocator::AllocatorKind;
 use rustc_ast::expand::autodiff_attrs::AutoDiffItem;
 use rustc_codegen_ssa::back::lto::{LtoModuleCodegen, SerializedModule, ThinModule};
@@ -71,14 +71,7 @@ mod debuginfo;
 mod declare;
 mod errors;
 mod intrinsic;
-
-// The following is a workaround that replaces `pub mod llvm;` and that fixes issue 53912.
-#[path = "llvm/mod.rs"]
-mod llvm_;
-pub mod llvm {
-    pub use super::llvm_::*;
-}
-
+mod llvm;
 mod llvm_util;
 mod mono_item;
 mod type_;
@@ -120,9 +113,11 @@ impl ExtraBackendMethods for LlvmCodegenBackend {
         kind: AllocatorKind,
         alloc_error_handler_kind: AllocatorKind,
     ) -> ModuleLlvm {
-        let mut module_llvm = ModuleLlvm::new_metadata(tcx, module_name);
+        let module_llvm = ModuleLlvm::new_metadata(tcx, module_name);
+        let cx =
+            SimpleCx::new(module_llvm.llmod(), &module_llvm.llcx, tcx.data_layout.pointer_size);
         unsafe {
-            allocator::codegen(tcx, &mut module_llvm, module_name, kind, alloc_error_handler_kind);
+            allocator::codegen(tcx, cx, module_name, kind, alloc_error_handler_kind);
         }
         module_llvm
     }
@@ -198,7 +193,7 @@ impl WriteBackendMethods for LlvmCodegenBackend {
     unsafe fn optimize(
         cgcx: &CodegenContext<Self>,
         dcx: DiagCtxtHandle<'_>,
-        module: &ModuleCodegen<Self::Module>,
+        module: &mut ModuleCodegen<Self::Module>,
         config: &ModuleConfig,
     ) -> Result<(), FatalError> {
         unsafe { back::write::optimize(cgcx, dcx, module, config) }
@@ -249,9 +244,6 @@ impl WriteBackendMethods for LlvmCodegenBackend {
     }
 }
 
-unsafe impl Send for LlvmCodegenBackend {} // Llvm is on a per-thread basis
-unsafe impl Sync for LlvmCodegenBackend {}
-
 impl LlvmCodegenBackend {
     pub fn new() -> Box<dyn CodegenBackend> {
         Box::new(LlvmCodegenBackend(()))
@@ -346,8 +338,8 @@ impl CodegenBackend for LlvmCodegenBackend {
         llvm_util::print_version();
     }
 
-    fn target_features_cfg(&self, sess: &Session, allow_unstable: bool) -> Vec<Symbol> {
-        target_features_cfg(sess, allow_unstable)
+    fn target_features_cfg(&self, sess: &Session) -> (Vec<Symbol>, Vec<Symbol>) {
+        target_features_cfg(sess)
     }
 
     fn codegen_crate<'tcx>(
diff --git a/compiler/rustc_codegen_llvm/src/llvm/archive_ro.rs b/compiler/rustc_codegen_llvm/src/llvm/archive_ro.rs
index 4dabde55e98..51bcc4d123d 100644
--- a/compiler/rustc_codegen_llvm/src/llvm/archive_ro.rs
+++ b/compiler/rustc_codegen_llvm/src/llvm/archive_ro.rs
@@ -5,17 +5,17 @@ use std::{slice, str};
 
 use rustc_fs_util::path_to_c_string;
 
-pub struct ArchiveRO {
+pub(crate) struct ArchiveRO {
     pub raw: &'static mut super::Archive,
 }
 
 unsafe impl Send for ArchiveRO {}
 
-pub struct Iter<'a> {
+pub(crate) struct Iter<'a> {
     raw: &'a mut super::ArchiveIterator<'a>,
 }
 
-pub struct Child<'a> {
+pub(crate) struct Child<'a> {
     pub raw: &'a mut super::ArchiveChild<'a>,
 }
 
@@ -26,7 +26,7 @@ impl ArchiveRO {
     ///
     /// If this archive is used with a mutable method, then an error will be
     /// raised.
-    pub fn open(dst: &Path) -> Result<ArchiveRO, String> {
+    pub(crate) fn open(dst: &Path) -> Result<ArchiveRO, String> {
         unsafe {
             let s = path_to_c_string(dst);
             let ar = super::LLVMRustOpenArchive(s.as_ptr()).ok_or_else(|| {
@@ -36,7 +36,7 @@ impl ArchiveRO {
         }
     }
 
-    pub fn iter(&self) -> Iter<'_> {
+    pub(crate) fn iter(&self) -> Iter<'_> {
         unsafe { Iter { raw: super::LLVMRustArchiveIteratorNew(self.raw) } }
     }
 }
@@ -71,7 +71,7 @@ impl<'a> Drop for Iter<'a> {
 }
 
 impl<'a> Child<'a> {
-    pub fn name(&self) -> Option<&'a str> {
+    pub(crate) fn name(&self) -> Option<&'a str> {
         unsafe {
             let mut name_len = 0;
             let name_ptr = super::LLVMRustArchiveChildName(self.raw, &mut name_len);
diff --git a/compiler/rustc_codegen_llvm/src/llvm/diagnostic.rs b/compiler/rustc_codegen_llvm/src/llvm/diagnostic.rs
index 11043b664f5..0e0f2b0eab0 100644
--- a/compiler/rustc_codegen_llvm/src/llvm/diagnostic.rs
+++ b/compiler/rustc_codegen_llvm/src/llvm/diagnostic.rs
@@ -3,13 +3,13 @@
 use libc::c_uint;
 use rustc_span::InnerSpan;
 
-pub use self::Diagnostic::*;
-pub use self::OptimizationDiagnosticKind::*;
+pub(crate) use self::Diagnostic::*;
+use self::OptimizationDiagnosticKind::*;
 use super::{DiagnosticInfo, SMDiagnostic};
 use crate::value::Value;
 
 #[derive(Copy, Clone, Debug)]
-pub enum OptimizationDiagnosticKind {
+pub(crate) enum OptimizationDiagnosticKind {
     OptimizationRemark,
     OptimizationMissed,
     OptimizationAnalysis,
@@ -19,9 +19,10 @@ pub enum OptimizationDiagnosticKind {
     OptimizationRemarkOther,
 }
 
-pub struct OptimizationDiagnostic<'ll> {
+pub(crate) struct OptimizationDiagnostic<'ll> {
     pub kind: OptimizationDiagnosticKind,
     pub pass_name: String,
+    #[expect(dead_code)]
     pub function: &'ll Value,
     pub line: c_uint,
     pub column: c_uint,
@@ -73,14 +74,14 @@ impl<'ll> OptimizationDiagnostic<'ll> {
     }
 }
 
-pub struct SrcMgrDiagnostic {
+pub(crate) struct SrcMgrDiagnostic {
     pub level: super::DiagnosticLevel,
     pub message: String,
     pub source: Option<(String, Vec<InnerSpan>)>,
 }
 
 impl SrcMgrDiagnostic {
-    pub unsafe fn unpack(diag: &SMDiagnostic) -> SrcMgrDiagnostic {
+    pub(crate) unsafe fn unpack(diag: &SMDiagnostic) -> SrcMgrDiagnostic {
         // Recover the post-substitution assembly code from LLVM for better
         // diagnostics.
         let mut have_source = false;
@@ -120,7 +121,7 @@ impl SrcMgrDiagnostic {
 }
 
 #[derive(Clone)]
-pub struct InlineAsmDiagnostic {
+pub(crate) struct InlineAsmDiagnostic {
     pub level: super::DiagnosticLevel,
     pub cookie: u64,
     pub message: String,
@@ -158,7 +159,7 @@ impl InlineAsmDiagnostic {
     }
 }
 
-pub enum Diagnostic<'ll> {
+pub(crate) enum Diagnostic<'ll> {
     Optimization(OptimizationDiagnostic<'ll>),
     InlineAsm(InlineAsmDiagnostic),
     PGO(&'ll DiagnosticInfo),
@@ -166,11 +167,12 @@ pub enum Diagnostic<'ll> {
     Unsupported(&'ll DiagnosticInfo),
 
     /// LLVM has other types that we do not wrap here.
+    #[expect(dead_code)]
     UnknownDiagnostic(&'ll DiagnosticInfo),
 }
 
 impl<'ll> Diagnostic<'ll> {
-    pub unsafe fn unpack(di: &'ll DiagnosticInfo) -> Self {
+    pub(crate) unsafe fn unpack(di: &'ll DiagnosticInfo) -> Self {
         use super::DiagnosticKind as Dk;
 
         unsafe {
diff --git a/compiler/rustc_codegen_llvm/src/llvm/enzyme_ffi.rs b/compiler/rustc_codegen_llvm/src/llvm/enzyme_ffi.rs
index 92b0ce8ffe1..a9b3bdf7344 100644
--- a/compiler/rustc_codegen_llvm/src/llvm/enzyme_ffi.rs
+++ b/compiler/rustc_codegen_llvm/src/llvm/enzyme_ffi.rs
@@ -1,36 +1,134 @@
 #![allow(non_camel_case_types)]
+#![expect(dead_code)]
 
 use libc::{c_char, c_uint};
 
-use super::ffi::{BasicBlock, Metadata, Module, Type, Value};
+use super::MetadataKindId;
+use super::ffi::{AttributeKind, BasicBlock, Metadata, Module, Type, Value};
 use crate::llvm::Bool;
 
 #[link(name = "llvm-wrapper", kind = "static")]
-extern "C" {
+unsafe extern "C" {
     // Enzyme
-    pub fn LLVMRustHasMetadata(I: &Value, KindID: c_uint) -> bool;
-    pub fn LLVMRustEraseInstUntilInclusive(BB: &BasicBlock, I: &Value);
-    pub fn LLVMRustGetLastInstruction<'a>(BB: &BasicBlock) -> Option<&'a Value>;
-    pub fn LLVMRustDIGetInstMetadata(I: &Value) -> Option<&Metadata>;
-    pub fn LLVMRustEraseInstFromParent(V: &Value);
-    pub fn LLVMRustGetTerminator<'a>(B: &BasicBlock) -> &'a Value;
-    pub fn LLVMRustVerifyFunction(V: &Value, action: LLVMRustVerifierFailureAction) -> Bool;
+    pub(crate) safe fn LLVMRustHasMetadata(I: &Value, KindID: MetadataKindId) -> bool;
+    pub(crate) fn LLVMRustEraseInstUntilInclusive(BB: &BasicBlock, I: &Value);
+    pub(crate) fn LLVMRustGetLastInstruction<'a>(BB: &BasicBlock) -> Option<&'a Value>;
+    pub(crate) fn LLVMRustDIGetInstMetadata(I: &Value) -> Option<&Metadata>;
+    pub(crate) fn LLVMRustEraseInstFromParent(V: &Value);
+    pub(crate) fn LLVMRustGetTerminator<'a>(B: &BasicBlock) -> &'a Value;
+    pub(crate) fn LLVMRustVerifyFunction(V: &Value, action: LLVMRustVerifierFailureAction) -> Bool;
+    pub(crate) fn LLVMRustHasAttributeAtIndex(V: &Value, i: c_uint, Kind: AttributeKind) -> bool;
+    pub(crate) fn LLVMRustGetArrayNumElements(Ty: &Type) -> u64;
 }
 
-extern "C" {
+unsafe extern "C" {
     // Enzyme
-    pub fn LLVMDumpModule(M: &Module);
-    pub fn LLVMDumpValue(V: &Value);
-    pub fn LLVMGetFunctionCallConv(F: &Value) -> c_uint;
-    pub fn LLVMGetReturnType(T: &Type) -> &Type;
-    pub fn LLVMGetParams(Fnc: &Value, parms: *mut &Value);
-    pub fn LLVMGetNamedFunction(M: &Module, Name: *const c_char) -> Option<&Value>;
+    pub(crate) fn LLVMDumpModule(M: &Module);
+    pub(crate) fn LLVMDumpValue(V: &Value);
+    pub(crate) fn LLVMGetFunctionCallConv(F: &Value) -> c_uint;
+    pub(crate) fn LLVMGetReturnType(T: &Type) -> &Type;
+    pub(crate) fn LLVMGetParams(Fnc: &Value, parms: *mut &Value);
+    pub(crate) fn LLVMGetNamedFunction(M: &Module, Name: *const c_char) -> Option<&Value>;
 }
 
 #[repr(C)]
 #[derive(Copy, Clone, PartialEq)]
-pub enum LLVMRustVerifierFailureAction {
+pub(crate) enum LLVMRustVerifierFailureAction {
     LLVMAbortProcessAction = 0,
     LLVMPrintMessageAction = 1,
     LLVMReturnStatusAction = 2,
 }
+
+#[cfg(llvm_enzyme)]
+pub(crate) use self::Enzyme_AD::*;
+
+#[cfg(llvm_enzyme)]
+pub(crate) mod Enzyme_AD {
+    use libc::c_void;
+    unsafe extern "C" {
+        pub(crate) fn EnzymeSetCLBool(arg1: *mut ::std::os::raw::c_void, arg2: u8);
+    }
+    unsafe extern "C" {
+        static mut EnzymePrintPerf: c_void;
+        static mut EnzymePrintActivity: c_void;
+        static mut EnzymePrintType: c_void;
+        static mut EnzymePrint: c_void;
+        static mut EnzymeStrictAliasing: c_void;
+        static mut looseTypeAnalysis: c_void;
+        static mut EnzymeInline: c_void;
+        static mut RustTypeRules: c_void;
+    }
+    pub(crate) fn set_print_perf(print: bool) {
+        unsafe {
+            EnzymeSetCLBool(std::ptr::addr_of_mut!(EnzymePrintPerf), print as u8);
+        }
+    }
+    pub(crate) fn set_print_activity(print: bool) {
+        unsafe {
+            EnzymeSetCLBool(std::ptr::addr_of_mut!(EnzymePrintActivity), print as u8);
+        }
+    }
+    pub(crate) fn set_print_type(print: bool) {
+        unsafe {
+            EnzymeSetCLBool(std::ptr::addr_of_mut!(EnzymePrintType), print as u8);
+        }
+    }
+    pub(crate) fn set_print(print: bool) {
+        unsafe {
+            EnzymeSetCLBool(std::ptr::addr_of_mut!(EnzymePrint), print as u8);
+        }
+    }
+    pub(crate) fn set_strict_aliasing(strict: bool) {
+        unsafe {
+            EnzymeSetCLBool(std::ptr::addr_of_mut!(EnzymeStrictAliasing), strict as u8);
+        }
+    }
+    pub(crate) fn set_loose_types(loose: bool) {
+        unsafe {
+            EnzymeSetCLBool(std::ptr::addr_of_mut!(looseTypeAnalysis), loose as u8);
+        }
+    }
+    pub(crate) fn set_inline(val: bool) {
+        unsafe {
+            EnzymeSetCLBool(std::ptr::addr_of_mut!(EnzymeInline), val as u8);
+        }
+    }
+    pub(crate) fn set_rust_rules(val: bool) {
+        unsafe {
+            EnzymeSetCLBool(std::ptr::addr_of_mut!(RustTypeRules), val as u8);
+        }
+    }
+}
+
+#[cfg(not(llvm_enzyme))]
+pub(crate) use self::Fallback_AD::*;
+
+#[cfg(not(llvm_enzyme))]
+pub(crate) mod Fallback_AD {
+    #![allow(unused_variables)]
+
+    pub(crate) fn set_inline(val: bool) {
+        unimplemented!()
+    }
+    pub(crate) fn set_print_perf(print: bool) {
+        unimplemented!()
+    }
+    pub(crate) fn set_print_activity(print: bool) {
+        unimplemented!()
+    }
+    pub(crate) fn set_print_type(print: bool) {
+        unimplemented!()
+    }
+    pub(crate) fn set_print(print: bool) {
+        unimplemented!()
+    }
+    pub(crate) fn set_strict_aliasing(strict: bool) {
+        unimplemented!()
+    }
+    pub(crate) fn set_loose_types(loose: bool) {
+        unimplemented!()
+    }
+    pub(crate) fn set_rust_rules(val: bool) {
+        unimplemented!()
+    }
+}
diff --git a/compiler/rustc_codegen_llvm/src/llvm/ffi.rs b/compiler/rustc_codegen_llvm/src/llvm/ffi.rs
index 0d04f770bc6..9ff04f72903 100644
--- a/compiler/rustc_codegen_llvm/src/llvm/ffi.rs
+++ b/compiler/rustc_codegen_llvm/src/llvm/ffi.rs
@@ -32,10 +32,10 @@ use crate::llvm;
 
 /// In the LLVM-C API, boolean values are passed as `typedef int LLVMBool`,
 /// which has a different ABI from Rust or C++ `bool`.
-pub type Bool = c_int;
+pub(crate) type Bool = c_int;
 
-pub const True: Bool = 1 as Bool;
-pub const False: Bool = 0 as Bool;
+pub(crate) const True: Bool = 1 as Bool;
+pub(crate) const False: Bool = 0 as Bool;
 
 /// Wrapper for a raw enum value returned from LLVM's C APIs.
 ///
@@ -44,7 +44,7 @@ pub const False: Bool = 0 as Bool;
 /// value and returns it. Instead, return this raw wrapper, then convert to the
 /// Rust-side enum explicitly.
 #[repr(transparent)]
-pub struct RawEnum<T> {
+pub(crate) struct RawEnum<T> {
     value: u32,
     /// We don't own or consume a `T`, but we can produce one.
     _rust_side_type: PhantomData<fn() -> T>,
@@ -64,24 +64,11 @@ impl<T: TryFrom<u32>> RawEnum<T> {
 #[derive(Copy, Clone, PartialEq)]
 #[repr(C)]
 #[allow(dead_code)] // Variants constructed by C++.
-pub enum LLVMRustResult {
+pub(crate) enum LLVMRustResult {
     Success,
     Failure,
 }
 
-/// Translation of LLVM's MachineTypes enum, defined in llvm\include\llvm\BinaryFormat\COFF.h.
-///
-/// We include only architectures supported on Windows.
-#[derive(Copy, Clone, PartialEq)]
-#[repr(C)]
-pub enum LLVMMachineType {
-    AMD64 = 0x8664,
-    I386 = 0x14c,
-    ARM64 = 0xaa64,
-    ARM64EC = 0xa641,
-    ARM = 0x01c0,
-}
-
 /// Must match the layout of `LLVMRustModuleFlagMergeBehavior`.
 ///
 /// When merging modules (e.g. during LTO), their metadata flags are combined. Conflicts are
@@ -96,7 +83,7 @@ pub enum LLVMMachineType {
 /// C++ API.
 #[derive(Copy, Clone, PartialEq)]
 #[repr(C)]
-pub enum ModuleFlagMergeBehavior {
+pub(crate) enum ModuleFlagMergeBehavior {
     Error = 1,
     Warning = 2,
     Require = 3,
@@ -114,7 +101,7 @@ pub enum ModuleFlagMergeBehavior {
 /// See <https://github.com/llvm/llvm-project/blob/main/llvm/include/llvm/IR/CallingConv.h>
 #[derive(Copy, Clone, PartialEq, Debug, TryFromU32)]
 #[repr(C)]
-pub enum CallConv {
+pub(crate) enum CallConv {
     CCallConv = 0,
     FastCallConv = 8,
     ColdCallConv = 9,
@@ -139,7 +126,7 @@ pub enum CallConv {
 /// Must match the layout of `LLVMLinkage`.
 #[derive(Copy, Clone, PartialEq, TryFromU32)]
 #[repr(C)]
-pub enum Linkage {
+pub(crate) enum Linkage {
     ExternalLinkage = 0,
     AvailableExternallyLinkage = 1,
     LinkOnceAnyLinkage = 2,
@@ -166,14 +153,14 @@ pub enum Linkage {
 /// Must match the layout of `LLVMVisibility`.
 #[repr(C)]
 #[derive(Copy, Clone, PartialEq, TryFromU32)]
-pub enum Visibility {
+pub(crate) enum Visibility {
     Default = 0,
     Hidden = 1,
     Protected = 2,
 }
 
 impl Visibility {
-    pub fn from_generic(visibility: SymbolVisibility) -> Self {
+    pub(crate) fn from_generic(visibility: SymbolVisibility) -> Self {
         match visibility {
             SymbolVisibility::Hidden => Visibility::Hidden,
             SymbolVisibility::Protected => Visibility::Protected,
@@ -184,8 +171,9 @@ impl Visibility {
 
 /// LLVMUnnamedAddr
 #[repr(C)]
-pub enum UnnamedAddr {
+pub(crate) enum UnnamedAddr {
     No,
+    #[expect(dead_code)]
     Local,
     Global,
 }
@@ -193,7 +181,7 @@ pub enum UnnamedAddr {
 /// LLVMDLLStorageClass
 #[derive(Copy, Clone)]
 #[repr(C)]
-pub enum DLLStorageClass {
+pub(crate) enum DLLStorageClass {
     #[allow(dead_code)]
     Default = 0,
     DllImport = 1, // Function to be imported from DLL.
@@ -206,7 +194,8 @@ pub enum DLLStorageClass {
 /// though it is not ABI compatible (since it's a C++ enum)
 #[repr(C)]
 #[derive(Copy, Clone, Debug)]
-pub enum AttributeKind {
+#[expect(dead_code, reason = "Some variants are unused, but are kept to match the C++")]
+pub(crate) enum AttributeKind {
     AlwaysInline = 0,
     ByVal = 1,
     Cold = 2,
@@ -254,7 +243,7 @@ pub enum AttributeKind {
 /// LLVMIntPredicate
 #[derive(Copy, Clone)]
 #[repr(C)]
-pub enum IntPredicate {
+pub(crate) enum IntPredicate {
     IntEQ = 32,
     IntNE = 33,
     IntUGT = 34,
@@ -268,7 +257,7 @@ pub enum IntPredicate {
 }
 
 impl IntPredicate {
-    pub fn from_generic(intpre: rustc_codegen_ssa::common::IntPredicate) -> Self {
+    pub(crate) fn from_generic(intpre: rustc_codegen_ssa::common::IntPredicate) -> Self {
         use rustc_codegen_ssa::common::IntPredicate as Common;
         match intpre {
             Common::IntEQ => Self::IntEQ,
@@ -288,7 +277,7 @@ impl IntPredicate {
 /// LLVMRealPredicate
 #[derive(Copy, Clone)]
 #[repr(C)]
-pub enum RealPredicate {
+pub(crate) enum RealPredicate {
     RealPredicateFalse = 0,
     RealOEQ = 1,
     RealOGT = 2,
@@ -308,7 +297,7 @@ pub enum RealPredicate {
 }
 
 impl RealPredicate {
-    pub fn from_generic(realp: rustc_codegen_ssa::common::RealPredicate) -> Self {
+    pub(crate) fn from_generic(realp: rustc_codegen_ssa::common::RealPredicate) -> Self {
         use rustc_codegen_ssa::common::RealPredicate as Common;
         match realp {
             Common::RealPredicateFalse => Self::RealPredicateFalse,
@@ -334,7 +323,8 @@ impl RealPredicate {
 /// LLVMTypeKind
 #[derive(Copy, Clone, PartialEq, Debug)]
 #[repr(C)]
-pub enum TypeKind {
+#[expect(dead_code, reason = "Some variants are unused, but are kept to match LLVM-C")]
+pub(crate) enum TypeKind {
     Void = 0,
     Half = 1,
     Float = 2,
@@ -357,7 +347,7 @@ pub enum TypeKind {
 }
 
 impl TypeKind {
-    pub fn to_generic(self) -> rustc_codegen_ssa::common::TypeKind {
+    pub(crate) fn to_generic(self) -> rustc_codegen_ssa::common::TypeKind {
         use rustc_codegen_ssa::common::TypeKind as Common;
         match self {
             Self::Void => Common::Void,
@@ -386,7 +376,7 @@ impl TypeKind {
 /// LLVMAtomicRmwBinOp
 #[derive(Copy, Clone)]
 #[repr(C)]
-pub enum AtomicRmwBinOp {
+pub(crate) enum AtomicRmwBinOp {
     AtomicXchg = 0,
     AtomicAdd = 1,
     AtomicSub = 2,
@@ -401,7 +391,7 @@ pub enum AtomicRmwBinOp {
 }
 
 impl AtomicRmwBinOp {
-    pub fn from_generic(op: rustc_codegen_ssa::common::AtomicRmwBinOp) -> Self {
+    pub(crate) fn from_generic(op: rustc_codegen_ssa::common::AtomicRmwBinOp) -> Self {
         use rustc_codegen_ssa::common::AtomicRmwBinOp as Common;
         match op {
             Common::AtomicXchg => Self::AtomicXchg,
@@ -422,7 +412,7 @@ impl AtomicRmwBinOp {
 /// LLVMAtomicOrdering
 #[derive(Copy, Clone)]
 #[repr(C)]
-pub enum AtomicOrdering {
+pub(crate) enum AtomicOrdering {
     #[allow(dead_code)]
     NotAtomic = 0,
     Unordered = 1,
@@ -435,7 +425,7 @@ pub enum AtomicOrdering {
 }
 
 impl AtomicOrdering {
-    pub fn from_generic(ao: rustc_codegen_ssa::common::AtomicOrdering) -> Self {
+    pub(crate) fn from_generic(ao: rustc_codegen_ssa::common::AtomicOrdering) -> Self {
         use rustc_codegen_ssa::common::AtomicOrdering as Common;
         match ao {
             Common::Unordered => Self::Unordered,
@@ -451,7 +441,7 @@ impl AtomicOrdering {
 /// LLVMRustFileType
 #[derive(Copy, Clone)]
 #[repr(C)]
-pub enum FileType {
+pub(crate) enum FileType {
     AssemblyFile,
     ObjectFile,
 }
@@ -459,7 +449,8 @@ pub enum FileType {
 /// LLVMMetadataType
 #[derive(Copy, Clone)]
 #[repr(C)]
-pub enum MetadataType {
+#[expect(dead_code, reason = "Some variants are unused, but are kept to match LLVM-C")]
+pub(crate) enum MetadataType {
     MD_dbg = 0,
     MD_tbaa = 1,
     MD_prof = 2,
@@ -483,7 +474,7 @@ pub enum MetadataType {
 /// LLVMRustAsmDialect
 #[derive(Copy, Clone, PartialEq)]
 #[repr(C)]
-pub enum AsmDialect {
+pub(crate) enum AsmDialect {
     Att,
     Intel,
 }
@@ -491,7 +482,7 @@ pub enum AsmDialect {
 /// LLVMRustCodeGenOptLevel
 #[derive(Copy, Clone, PartialEq)]
 #[repr(C)]
-pub enum CodeGenOptLevel {
+pub(crate) enum CodeGenOptLevel {
     None,
     Less,
     Default,
@@ -500,7 +491,7 @@ pub enum CodeGenOptLevel {
 
 /// LLVMRustPassBuilderOptLevel
 #[repr(C)]
-pub enum PassBuilderOptLevel {
+pub(crate) enum PassBuilderOptLevel {
     O0,
     O1,
     O2,
@@ -512,7 +503,7 @@ pub enum PassBuilderOptLevel {
 /// LLVMRustOptStage
 #[derive(PartialEq)]
 #[repr(C)]
-pub enum OptStage {
+pub(crate) enum OptStage {
     PreLinkNoLTO,
     PreLinkThinLTO,
     PreLinkFatLTO,
@@ -522,7 +513,7 @@ pub enum OptStage {
 
 /// LLVMRustSanitizerOptions
 #[repr(C)]
-pub struct SanitizerOptions {
+pub(crate) struct SanitizerOptions {
     pub sanitize_address: bool,
     pub sanitize_address_recover: bool,
     pub sanitize_cfi: bool,
@@ -543,7 +534,7 @@ pub struct SanitizerOptions {
 /// LLVMRustRelocModel
 #[derive(Copy, Clone, PartialEq)]
 #[repr(C)]
-pub enum RelocModel {
+pub(crate) enum RelocModel {
     Static,
     PIC,
     DynamicNoPic,
@@ -555,7 +546,7 @@ pub enum RelocModel {
 /// LLVMRustFloatABI
 #[derive(Copy, Clone, PartialEq)]
 #[repr(C)]
-pub enum FloatAbi {
+pub(crate) enum FloatAbi {
     Default,
     Soft,
     Hard,
@@ -564,7 +555,7 @@ pub enum FloatAbi {
 /// LLVMRustCodeModel
 #[derive(Copy, Clone)]
 #[repr(C)]
-pub enum CodeModel {
+pub(crate) enum CodeModel {
     Tiny,
     Small,
     Kernel,
@@ -577,7 +568,7 @@ pub enum CodeModel {
 #[derive(Copy, Clone)]
 #[repr(C)]
 #[allow(dead_code)] // Variants constructed by C++.
-pub enum DiagnosticKind {
+pub(crate) enum DiagnosticKind {
     Other,
     InlineAsm,
     StackSize,
@@ -600,7 +591,7 @@ pub enum DiagnosticKind {
 #[derive(Copy, Clone)]
 #[repr(C)]
 #[allow(dead_code)] // Variants constructed by C++.
-pub enum DiagnosticLevel {
+pub(crate) enum DiagnosticLevel {
     Error,
     Warning,
     Note,
@@ -610,7 +601,7 @@ pub enum DiagnosticLevel {
 /// LLVMRustArchiveKind
 #[derive(Copy, Clone)]
 #[repr(C)]
-pub enum ArchiveKind {
+pub(crate) enum ArchiveKind {
     K_GNU,
     K_BSD,
     K_DARWIN,
@@ -620,15 +611,15 @@ pub enum ArchiveKind {
 
 unsafe extern "C" {
     // LLVMRustThinLTOData
-    pub type ThinLTOData;
+    pub(crate) type ThinLTOData;
 
     // LLVMRustThinLTOBuffer
-    pub type ThinLTOBuffer;
+    pub(crate) type ThinLTOBuffer;
 }
 
 /// LLVMRustThinLTOModule
 #[repr(C)]
-pub struct ThinLTOModule {
+pub(crate) struct ThinLTOModule {
     pub identifier: *const c_char,
     pub data: *const u8,
     pub len: usize,
@@ -637,7 +628,8 @@ pub struct ThinLTOModule {
 /// LLVMThreadLocalMode
 #[derive(Copy, Clone)]
 #[repr(C)]
-pub enum ThreadLocalMode {
+pub(crate) enum ThreadLocalMode {
+    #[expect(dead_code)]
     NotThreadLocal,
     GeneralDynamic,
     LocalDynamic,
@@ -645,20 +637,10 @@ pub enum ThreadLocalMode {
     LocalExec,
 }
 
-/// LLVMRustTailCallKind
-#[derive(Copy, Clone)]
-#[repr(C)]
-pub enum TailCallKind {
-    None,
-    Tail,
-    MustTail,
-    NoTail,
-}
-
 /// LLVMRustChecksumKind
 #[derive(Copy, Clone)]
 #[repr(C)]
-pub enum ChecksumKind {
+pub(crate) enum ChecksumKind {
     None,
     MD5,
     SHA1,
@@ -668,7 +650,7 @@ pub enum ChecksumKind {
 /// LLVMRustMemoryEffects
 #[derive(Copy, Clone)]
 #[repr(C)]
-pub enum MemoryEffects {
+pub(crate) enum MemoryEffects {
     None,
     ReadOnly,
     InaccessibleMemOnly,
@@ -677,7 +659,8 @@ pub enum MemoryEffects {
 /// LLVMOpcode
 #[derive(Copy, Clone, PartialEq, Eq)]
 #[repr(C)]
-pub enum Opcode {
+#[expect(dead_code, reason = "Some variants are unused, but are kept to match LLVM-C")]
+pub(crate) enum Opcode {
     Ret = 1,
     Br = 2,
     Switch = 3,
@@ -758,50 +741,48 @@ struct InvariantOpaque<'a> {
 
 // Opaque pointer types
 unsafe extern "C" {
-    pub type Module;
-    pub type Context;
-    pub type Type;
-    pub type Value;
-    pub type ConstantInt;
-    pub type Attribute;
-    pub type Metadata;
-    pub type BasicBlock;
-    pub type Comdat;
+    pub(crate) type Module;
+    pub(crate) type Context;
+    pub(crate) type Type;
+    pub(crate) type Value;
+    pub(crate) type ConstantInt;
+    pub(crate) type Attribute;
+    pub(crate) type Metadata;
+    pub(crate) type BasicBlock;
+    pub(crate) type Comdat;
 }
 #[repr(C)]
-pub struct Builder<'a>(InvariantOpaque<'a>);
+pub(crate) struct Builder<'a>(InvariantOpaque<'a>);
 #[repr(C)]
-pub struct PassManager<'a>(InvariantOpaque<'a>);
+pub(crate) struct PassManager<'a>(InvariantOpaque<'a>);
 unsafe extern "C" {
-    pub type Pass;
     pub type TargetMachine;
-    pub type Archive;
+    pub(crate) type Archive;
 }
 #[repr(C)]
-pub struct ArchiveIterator<'a>(InvariantOpaque<'a>);
+pub(crate) struct ArchiveIterator<'a>(InvariantOpaque<'a>);
 #[repr(C)]
-pub struct ArchiveChild<'a>(InvariantOpaque<'a>);
+pub(crate) struct ArchiveChild<'a>(InvariantOpaque<'a>);
 unsafe extern "C" {
-    pub type Twine;
-    pub type DiagnosticInfo;
-    pub type SMDiagnostic;
+    pub(crate) type Twine;
+    pub(crate) type DiagnosticInfo;
+    pub(crate) type SMDiagnostic;
 }
 #[repr(C)]
-pub struct RustArchiveMember<'a>(InvariantOpaque<'a>);
+pub(crate) struct RustArchiveMember<'a>(InvariantOpaque<'a>);
 /// Opaque pointee of `LLVMOperandBundleRef`.
 #[repr(C)]
 pub(crate) struct OperandBundle<'a>(InvariantOpaque<'a>);
 #[repr(C)]
-pub struct Linker<'a>(InvariantOpaque<'a>);
+pub(crate) struct Linker<'a>(InvariantOpaque<'a>);
 
 unsafe extern "C" {
-    pub type DiagnosticHandler;
+    pub(crate) type DiagnosticHandler;
 }
 
-pub type DiagnosticHandlerTy = unsafe extern "C" fn(&DiagnosticInfo, *mut c_void);
-pub type InlineAsmDiagHandlerTy = unsafe extern "C" fn(&SMDiagnostic, *const c_void, c_uint);
+pub(crate) type DiagnosticHandlerTy = unsafe extern "C" fn(&DiagnosticInfo, *mut c_void);
 
-pub mod debuginfo {
+pub(crate) mod debuginfo {
     use std::ptr;
 
     use bitflags::bitflags;
@@ -818,7 +799,7 @@ pub mod debuginfo {
     /// builder reference typically has a shorter lifetime than the LLVM
     /// session (`'ll`) that it participates in.
     #[repr(C)]
-    pub struct DIBuilder<'ll>(InvariantOpaque<'ll>);
+    pub(crate) struct DIBuilder<'ll>(InvariantOpaque<'ll>);
 
     /// Owning pointer to a `DIBuilder<'ll>` that will dispose of the builder
     /// when dropped. Use `.as_ref()` to get the underlying `&DIBuilder`
@@ -847,23 +828,22 @@ pub mod debuginfo {
         }
     }
 
-    pub type DIDescriptor = Metadata;
-    pub type DILocation = Metadata;
-    pub type DIScope = DIDescriptor;
-    pub type DIFile = DIScope;
-    pub type DILexicalBlock = DIScope;
-    pub type DISubprogram = DIScope;
-    pub type DINameSpace = DIScope;
-    pub type DIType = DIDescriptor;
-    pub type DIBasicType = DIType;
-    pub type DIDerivedType = DIType;
-    pub type DICompositeType = DIDerivedType;
-    pub type DIVariable = DIDescriptor;
-    pub type DIGlobalVariableExpression = DIDescriptor;
-    pub type DIArray = DIDescriptor;
-    pub type DISubrange = DIDescriptor;
-    pub type DIEnumerator = DIDescriptor;
-    pub type DITemplateTypeParameter = DIDescriptor;
+    pub(crate) type DIDescriptor = Metadata;
+    pub(crate) type DILocation = Metadata;
+    pub(crate) type DIScope = DIDescriptor;
+    pub(crate) type DIFile = DIScope;
+    pub(crate) type DILexicalBlock = DIScope;
+    pub(crate) type DISubprogram = DIScope;
+    pub(crate) type DIType = DIDescriptor;
+    pub(crate) type DIBasicType = DIType;
+    pub(crate) type DIDerivedType = DIType;
+    pub(crate) type DICompositeType = DIDerivedType;
+    pub(crate) type DIVariable = DIDescriptor;
+    pub(crate) type DIGlobalVariableExpression = DIDescriptor;
+    pub(crate) type DIArray = DIDescriptor;
+    pub(crate) type DISubrange = DIDescriptor;
+    pub(crate) type DIEnumerator = DIDescriptor;
+    pub(crate) type DITemplateTypeParameter = DIDescriptor;
 
     bitflags! {
         /// Must match the layout of `LLVMDIFlags` in the LLVM-C API.
@@ -872,7 +852,7 @@ pub mod debuginfo {
         /// assertions in `RustWrapper.cpp` used by `fromRust(LLVMDIFlags)`.
         #[repr(transparent)]
         #[derive(Clone, Copy, Default)]
-        pub struct DIFlags: u32 {
+        pub(crate) struct DIFlags: u32 {
             const FlagZero                = 0;
             const FlagPrivate             = 1;
             const FlagProtected           = 2;
@@ -912,7 +892,7 @@ pub mod debuginfo {
     bitflags! {
         #[repr(transparent)]
         #[derive(Clone, Copy, Default)]
-        pub struct DISPFlags: u32 {
+        pub(crate) struct DISPFlags: u32 {
             const SPFlagZero              = 0;
             const SPFlagVirtual           = 1;
             const SPFlagPureVirtual       = 2;
@@ -926,7 +906,7 @@ pub mod debuginfo {
     /// LLVMRustDebugEmissionKind
     #[derive(Copy, Clone)]
     #[repr(C)]
-    pub enum DebugEmissionKind {
+    pub(crate) enum DebugEmissionKind {
         NoDebug,
         FullDebug,
         LineTablesOnly,
@@ -934,7 +914,7 @@ pub mod debuginfo {
     }
 
     impl DebugEmissionKind {
-        pub fn from_generic(kind: rustc_session::config::DebugInfo) -> Self {
+        pub(crate) fn from_generic(kind: rustc_session::config::DebugInfo) -> Self {
             // We should be setting LLVM's emission kind to `LineTablesOnly` if
             // we are compiling with "limited" debuginfo. However, some of the
             // existing tools relied on slightly more debuginfo being generated than
@@ -958,8 +938,9 @@ pub mod debuginfo {
     /// LLVMRustDebugNameTableKind
     #[derive(Clone, Copy)]
     #[repr(C)]
-    pub enum DebugNameTableKind {
+    pub(crate) enum DebugNameTableKind {
         Default,
+        #[expect(dead_code)]
         Gnu,
         None,
     }
@@ -969,7 +950,7 @@ pub mod debuginfo {
 bitflags! {
     #[repr(transparent)]
     #[derive(Default)]
-    pub struct AllocKindFlags : u64 {
+    pub(crate) struct AllocKindFlags : u64 {
         const Unknown = 0;
         const Alloc = 1;
         const Realloc = 1 << 1;
@@ -980,62 +961,90 @@ bitflags! {
     }
 }
 
+// These values **must** match with LLVMGEPNoWrapFlags
+bitflags! {
+    #[repr(transparent)]
+    #[derive(Default)]
+    pub struct GEPNoWrapFlags : c_uint {
+        const InBounds = 1 << 0;
+        const NUSW = 1 << 1;
+        const NUW = 1 << 2;
+    }
+}
+
 unsafe extern "C" {
-    pub type ModuleBuffer;
+    pub(crate) type ModuleBuffer;
 }
 
-pub type SelfProfileBeforePassCallback =
+pub(crate) type SelfProfileBeforePassCallback =
     unsafe extern "C" fn(*mut c_void, *const c_char, *const c_char);
-pub type SelfProfileAfterPassCallback = unsafe extern "C" fn(*mut c_void);
+pub(crate) type SelfProfileAfterPassCallback = unsafe extern "C" fn(*mut c_void);
 
-pub type GetSymbolsCallback = unsafe extern "C" fn(*mut c_void, *const c_char) -> *mut c_void;
-pub type GetSymbolsErrorCallback = unsafe extern "C" fn(*const c_char) -> *mut c_void;
+pub(crate) type GetSymbolsCallback =
+    unsafe extern "C" fn(*mut c_void, *const c_char) -> *mut c_void;
+pub(crate) type GetSymbolsErrorCallback = unsafe extern "C" fn(*const c_char) -> *mut c_void;
+
+#[derive(Copy, Clone)]
+#[repr(transparent)]
+pub(crate) struct MetadataKindId(c_uint);
+
+impl From<MetadataType> for MetadataKindId {
+    fn from(value: MetadataType) -> Self {
+        Self(value as c_uint)
+    }
+}
 
 unsafe extern "C" {
     // Create and destroy contexts.
-    pub fn LLVMContextDispose(C: &'static mut Context);
-    pub fn LLVMGetMDKindIDInContext(C: &Context, Name: *const c_char, SLen: c_uint) -> c_uint;
+    pub(crate) fn LLVMContextDispose(C: &'static mut Context);
+    pub(crate) fn LLVMGetMDKindIDInContext(
+        C: &Context,
+        Name: *const c_char,
+        SLen: c_uint,
+    ) -> MetadataKindId;
 
     // Create modules.
-    pub fn LLVMModuleCreateWithNameInContext(ModuleID: *const c_char, C: &Context) -> &Module;
-    pub fn LLVMGetModuleContext(M: &Module) -> &Context;
-    pub fn LLVMCloneModule(M: &Module) -> &Module;
+    pub(crate) fn LLVMModuleCreateWithNameInContext(
+        ModuleID: *const c_char,
+        C: &Context,
+    ) -> &Module;
+    pub(crate) fn LLVMCloneModule(M: &Module) -> &Module;
 
     /// Data layout. See Module::getDataLayout.
-    pub fn LLVMGetDataLayoutStr(M: &Module) -> *const c_char;
-    pub fn LLVMSetDataLayout(M: &Module, Triple: *const c_char);
+    pub(crate) fn LLVMGetDataLayoutStr(M: &Module) -> *const c_char;
+    pub(crate) fn LLVMSetDataLayout(M: &Module, Triple: *const c_char);
 
     /// See Module::setModuleInlineAsm.
-    pub fn LLVMAppendModuleInlineAsm(M: &Module, Asm: *const c_char, Len: size_t);
+    pub(crate) fn LLVMAppendModuleInlineAsm(M: &Module, Asm: *const c_char, Len: size_t);
 
     // Operations on integer types
-    pub fn LLVMInt1TypeInContext(C: &Context) -> &Type;
-    pub fn LLVMInt8TypeInContext(C: &Context) -> &Type;
-    pub fn LLVMInt16TypeInContext(C: &Context) -> &Type;
-    pub fn LLVMInt32TypeInContext(C: &Context) -> &Type;
-    pub fn LLVMInt64TypeInContext(C: &Context) -> &Type;
-    pub fn LLVMIntTypeInContext(C: &Context, NumBits: c_uint) -> &Type;
+    pub(crate) fn LLVMInt1TypeInContext(C: &Context) -> &Type;
+    pub(crate) fn LLVMInt8TypeInContext(C: &Context) -> &Type;
+    pub(crate) fn LLVMInt16TypeInContext(C: &Context) -> &Type;
+    pub(crate) fn LLVMInt32TypeInContext(C: &Context) -> &Type;
+    pub(crate) fn LLVMInt64TypeInContext(C: &Context) -> &Type;
+    pub(crate) fn LLVMIntTypeInContext(C: &Context, NumBits: c_uint) -> &Type;
 
-    pub fn LLVMGetIntTypeWidth(IntegerTy: &Type) -> c_uint;
+    pub(crate) fn LLVMGetIntTypeWidth(IntegerTy: &Type) -> c_uint;
 
     // Operations on real types
-    pub fn LLVMHalfTypeInContext(C: &Context) -> &Type;
-    pub fn LLVMFloatTypeInContext(C: &Context) -> &Type;
-    pub fn LLVMDoubleTypeInContext(C: &Context) -> &Type;
-    pub fn LLVMFP128TypeInContext(C: &Context) -> &Type;
+    pub(crate) fn LLVMHalfTypeInContext(C: &Context) -> &Type;
+    pub(crate) fn LLVMFloatTypeInContext(C: &Context) -> &Type;
+    pub(crate) fn LLVMDoubleTypeInContext(C: &Context) -> &Type;
+    pub(crate) fn LLVMFP128TypeInContext(C: &Context) -> &Type;
 
     // Operations on function types
-    pub fn LLVMFunctionType<'a>(
+    pub(crate) fn LLVMFunctionType<'a>(
         ReturnType: &'a Type,
         ParamTypes: *const &'a Type,
         ParamCount: c_uint,
         IsVarArg: Bool,
     ) -> &'a Type;
-    pub fn LLVMCountParamTypes(FunctionTy: &Type) -> c_uint;
-    pub fn LLVMGetParamTypes<'a>(FunctionTy: &'a Type, Dest: *mut &'a Type);
+    pub(crate) fn LLVMCountParamTypes(FunctionTy: &Type) -> c_uint;
+    pub(crate) fn LLVMGetParamTypes<'a>(FunctionTy: &'a Type, Dest: *mut &'a Type);
 
     // Operations on struct types
-    pub fn LLVMStructTypeInContext<'a>(
+    pub(crate) fn LLVMStructTypeInContext<'a>(
         C: &'a Context,
         ElementTypes: *const &'a Type,
         ElementCount: c_uint,
@@ -1043,111 +1052,122 @@ unsafe extern "C" {
     ) -> &'a Type;
 
     // Operations on array, pointer, and vector types (sequence types)
-    pub fn LLVMPointerTypeInContext(C: &Context, AddressSpace: c_uint) -> &Type;
-    pub fn LLVMVectorType(ElementType: &Type, ElementCount: c_uint) -> &Type;
+    pub(crate) fn LLVMPointerTypeInContext(C: &Context, AddressSpace: c_uint) -> &Type;
+    pub(crate) fn LLVMVectorType(ElementType: &Type, ElementCount: c_uint) -> &Type;
 
-    pub fn LLVMGetElementType(Ty: &Type) -> &Type;
-    pub fn LLVMGetVectorSize(VectorTy: &Type) -> c_uint;
+    pub(crate) fn LLVMGetElementType(Ty: &Type) -> &Type;
+    pub(crate) fn LLVMGetVectorSize(VectorTy: &Type) -> c_uint;
 
     // Operations on other types
-    pub fn LLVMVoidTypeInContext(C: &Context) -> &Type;
-    pub fn LLVMTokenTypeInContext(C: &Context) -> &Type;
-    pub fn LLVMMetadataTypeInContext(C: &Context) -> &Type;
+    pub(crate) fn LLVMVoidTypeInContext(C: &Context) -> &Type;
+    pub(crate) fn LLVMTokenTypeInContext(C: &Context) -> &Type;
+    pub(crate) fn LLVMMetadataTypeInContext(C: &Context) -> &Type;
 
     // Operations on all values
-    pub fn LLVMIsUndef(Val: &Value) -> Bool;
-    pub fn LLVMTypeOf(Val: &Value) -> &Type;
-    pub fn LLVMGetValueName2(Val: &Value, Length: *mut size_t) -> *const c_char;
-    pub fn LLVMSetValueName2(Val: &Value, Name: *const c_char, NameLen: size_t);
-    pub fn LLVMReplaceAllUsesWith<'a>(OldVal: &'a Value, NewVal: &'a Value);
-    pub fn LLVMSetMetadata<'a>(Val: &'a Value, KindID: c_uint, Node: &'a Value);
-    pub fn LLVMGlobalSetMetadata<'a>(Val: &'a Value, KindID: c_uint, Metadata: &'a Metadata);
-    pub fn LLVMValueAsMetadata(Node: &Value) -> &Metadata;
+    pub(crate) fn LLVMTypeOf(Val: &Value) -> &Type;
+    pub(crate) fn LLVMGetValueName2(Val: &Value, Length: *mut size_t) -> *const c_char;
+    pub(crate) fn LLVMSetValueName2(Val: &Value, Name: *const c_char, NameLen: size_t);
+    pub(crate) fn LLVMReplaceAllUsesWith<'a>(OldVal: &'a Value, NewVal: &'a Value);
+    pub(crate) safe fn LLVMSetMetadata<'a>(Val: &'a Value, KindID: MetadataKindId, Node: &'a Value);
+    pub(crate) fn LLVMGlobalSetMetadata<'a>(Val: &'a Value, KindID: c_uint, Metadata: &'a Metadata);
+    pub(crate) safe fn LLVMValueAsMetadata(Node: &Value) -> &Metadata;
 
     // Operations on constants of any type
-    pub fn LLVMConstNull(Ty: &Type) -> &Value;
-    pub fn LLVMGetUndef(Ty: &Type) -> &Value;
-    pub fn LLVMGetPoison(Ty: &Type) -> &Value;
+    pub(crate) fn LLVMConstNull(Ty: &Type) -> &Value;
+    pub(crate) fn LLVMGetUndef(Ty: &Type) -> &Value;
+    pub(crate) fn LLVMGetPoison(Ty: &Type) -> &Value;
 
     // Operations on metadata
-    pub fn LLVMMDStringInContext2(C: &Context, Str: *const c_char, SLen: size_t) -> &Metadata;
-    pub fn LLVMMDNodeInContext2<'a>(
+    pub(crate) fn LLVMMDStringInContext2(
+        C: &Context,
+        Str: *const c_char,
+        SLen: size_t,
+    ) -> &Metadata;
+    pub(crate) fn LLVMMDNodeInContext2<'a>(
         C: &'a Context,
         Vals: *const &'a Metadata,
         Count: size_t,
     ) -> &'a Metadata;
-    pub fn LLVMAddNamedMetadataOperand<'a>(M: &'a Module, Name: *const c_char, Val: &'a Value);
+    pub(crate) fn LLVMAddNamedMetadataOperand<'a>(
+        M: &'a Module,
+        Name: *const c_char,
+        Val: &'a Value,
+    );
 
     // Operations on scalar constants
-    pub fn LLVMConstInt(IntTy: &Type, N: c_ulonglong, SignExtend: Bool) -> &Value;
-    pub fn LLVMConstIntOfArbitraryPrecision(IntTy: &Type, Wn: c_uint, Ws: *const u64) -> &Value;
-    pub fn LLVMConstReal(RealTy: &Type, N: f64) -> &Value;
+    pub(crate) fn LLVMConstInt(IntTy: &Type, N: c_ulonglong, SignExtend: Bool) -> &Value;
+    pub(crate) fn LLVMConstIntOfArbitraryPrecision(
+        IntTy: &Type,
+        Wn: c_uint,
+        Ws: *const u64,
+    ) -> &Value;
+    pub(crate) fn LLVMConstReal(RealTy: &Type, N: f64) -> &Value;
 
     // Operations on composite constants
-    pub fn LLVMConstArray2<'a>(
+    pub(crate) fn LLVMConstArray2<'a>(
         ElementTy: &'a Type,
         ConstantVals: *const &'a Value,
         Length: u64,
     ) -> &'a Value;
-    pub fn LLVMArrayType2(ElementType: &Type, ElementCount: u64) -> &Type;
-    pub fn LLVMConstStringInContext2(
+    pub(crate) fn LLVMArrayType2(ElementType: &Type, ElementCount: u64) -> &Type;
+    pub(crate) fn LLVMConstStringInContext2(
         C: &Context,
         Str: *const c_char,
         Length: size_t,
         DontNullTerminate: Bool,
     ) -> &Value;
-    pub fn LLVMConstStructInContext<'a>(
+    pub(crate) fn LLVMConstStructInContext<'a>(
         C: &'a Context,
         ConstantVals: *const &'a Value,
         Count: c_uint,
         Packed: Bool,
     ) -> &'a Value;
-    pub fn LLVMConstVector(ScalarConstantVals: *const &Value, Size: c_uint) -> &Value;
+    pub(crate) fn LLVMConstVector(ScalarConstantVals: *const &Value, Size: c_uint) -> &Value;
 
     // Constant expressions
-    pub fn LLVMConstInBoundsGEP2<'a>(
+    pub(crate) fn LLVMConstInBoundsGEP2<'a>(
         ty: &'a Type,
         ConstantVal: &'a Value,
         ConstantIndices: *const &'a Value,
         NumIndices: c_uint,
     ) -> &'a Value;
-    pub fn LLVMConstPtrToInt<'a>(ConstantVal: &'a Value, ToType: &'a Type) -> &'a Value;
-    pub fn LLVMConstIntToPtr<'a>(ConstantVal: &'a Value, ToType: &'a Type) -> &'a Value;
-    pub fn LLVMConstBitCast<'a>(ConstantVal: &'a Value, ToType: &'a Type) -> &'a Value;
-    pub fn LLVMConstPointerCast<'a>(ConstantVal: &'a Value, ToType: &'a Type) -> &'a Value;
-    pub fn LLVMGetAggregateElement(ConstantVal: &Value, Idx: c_uint) -> Option<&Value>;
-    pub fn LLVMGetConstOpcode(ConstantVal: &Value) -> Opcode;
-    pub fn LLVMIsAConstantExpr(Val: &Value) -> Option<&Value>;
+    pub(crate) fn LLVMConstPtrToInt<'a>(ConstantVal: &'a Value, ToType: &'a Type) -> &'a Value;
+    pub(crate) fn LLVMConstIntToPtr<'a>(ConstantVal: &'a Value, ToType: &'a Type) -> &'a Value;
+    pub(crate) fn LLVMConstBitCast<'a>(ConstantVal: &'a Value, ToType: &'a Type) -> &'a Value;
+    pub(crate) fn LLVMConstPointerCast<'a>(ConstantVal: &'a Value, ToType: &'a Type) -> &'a Value;
+    pub(crate) fn LLVMGetAggregateElement(ConstantVal: &Value, Idx: c_uint) -> Option<&Value>;
+    pub(crate) fn LLVMGetConstOpcode(ConstantVal: &Value) -> Opcode;
+    pub(crate) fn LLVMIsAConstantExpr(Val: &Value) -> Option<&Value>;
 
     // Operations on global variables, functions, and aliases (globals)
-    pub fn LLVMIsDeclaration(Global: &Value) -> Bool;
-    pub fn LLVMGetLinkage(Global: &Value) -> RawEnum<Linkage>;
-    pub fn LLVMSetLinkage(Global: &Value, RustLinkage: Linkage);
-    pub fn LLVMSetSection(Global: &Value, Section: *const c_char);
-    pub fn LLVMGetVisibility(Global: &Value) -> RawEnum<Visibility>;
-    pub fn LLVMSetVisibility(Global: &Value, Viz: Visibility);
-    pub fn LLVMGetAlignment(Global: &Value) -> c_uint;
-    pub fn LLVMSetAlignment(Global: &Value, Bytes: c_uint);
-    pub fn LLVMSetDLLStorageClass(V: &Value, C: DLLStorageClass);
-    pub fn LLVMGlobalGetValueType(Global: &Value) -> &Type;
+    pub(crate) fn LLVMIsDeclaration(Global: &Value) -> Bool;
+    pub(crate) fn LLVMGetLinkage(Global: &Value) -> RawEnum<Linkage>;
+    pub(crate) fn LLVMSetLinkage(Global: &Value, RustLinkage: Linkage);
+    pub(crate) fn LLVMSetSection(Global: &Value, Section: *const c_char);
+    pub(crate) fn LLVMGetVisibility(Global: &Value) -> RawEnum<Visibility>;
+    pub(crate) fn LLVMSetVisibility(Global: &Value, Viz: Visibility);
+    pub(crate) fn LLVMGetAlignment(Global: &Value) -> c_uint;
+    pub(crate) fn LLVMSetAlignment(Global: &Value, Bytes: c_uint);
+    pub(crate) fn LLVMSetDLLStorageClass(V: &Value, C: DLLStorageClass);
+    pub(crate) fn LLVMGlobalGetValueType(Global: &Value) -> &Type;
 
     // Operations on global variables
-    pub fn LLVMIsAGlobalVariable(GlobalVar: &Value) -> Option<&Value>;
-    pub fn LLVMAddGlobal<'a>(M: &'a Module, Ty: &'a Type, Name: *const c_char) -> &'a Value;
-    pub fn LLVMGetNamedGlobal(M: &Module, Name: *const c_char) -> Option<&Value>;
-    pub fn LLVMGetFirstGlobal(M: &Module) -> Option<&Value>;
-    pub fn LLVMGetNextGlobal(GlobalVar: &Value) -> Option<&Value>;
-    pub fn LLVMDeleteGlobal(GlobalVar: &Value);
-    pub fn LLVMGetInitializer(GlobalVar: &Value) -> Option<&Value>;
-    pub fn LLVMSetInitializer<'a>(GlobalVar: &'a Value, ConstantVal: &'a Value);
-    pub fn LLVMIsThreadLocal(GlobalVar: &Value) -> Bool;
-    pub fn LLVMSetThreadLocalMode(GlobalVar: &Value, Mode: ThreadLocalMode);
-    pub fn LLVMIsGlobalConstant(GlobalVar: &Value) -> Bool;
-    pub fn LLVMSetGlobalConstant(GlobalVar: &Value, IsConstant: Bool);
-    pub fn LLVMSetTailCall(CallInst: &Value, IsTailCall: Bool);
+    pub(crate) fn LLVMIsAGlobalVariable(GlobalVar: &Value) -> Option<&Value>;
+    pub(crate) fn LLVMAddGlobal<'a>(M: &'a Module, Ty: &'a Type, Name: *const c_char) -> &'a Value;
+    pub(crate) fn LLVMGetNamedGlobal(M: &Module, Name: *const c_char) -> Option<&Value>;
+    pub(crate) fn LLVMGetFirstGlobal(M: &Module) -> Option<&Value>;
+    pub(crate) fn LLVMGetNextGlobal(GlobalVar: &Value) -> Option<&Value>;
+    pub(crate) fn LLVMDeleteGlobal(GlobalVar: &Value);
+    pub(crate) fn LLVMGetInitializer(GlobalVar: &Value) -> Option<&Value>;
+    pub(crate) fn LLVMSetInitializer<'a>(GlobalVar: &'a Value, ConstantVal: &'a Value);
+    pub(crate) fn LLVMIsThreadLocal(GlobalVar: &Value) -> Bool;
+    pub(crate) fn LLVMSetThreadLocalMode(GlobalVar: &Value, Mode: ThreadLocalMode);
+    pub(crate) fn LLVMIsGlobalConstant(GlobalVar: &Value) -> Bool;
+    pub(crate) fn LLVMSetGlobalConstant(GlobalVar: &Value, IsConstant: Bool);
+    pub(crate) safe fn LLVMSetTailCall(CallInst: &Value, IsTailCall: Bool);
 
     // Operations on attributes
-    pub fn LLVMCreateStringAttribute(
+    pub(crate) fn LLVMCreateStringAttribute(
         C: &Context,
         Name: *const c_char,
         NameLen: c_uint,
@@ -1156,34 +1176,34 @@ unsafe extern "C" {
     ) -> &Attribute;
 
     // Operations on functions
-    pub fn LLVMSetFunctionCallConv(Fn: &Value, CC: c_uint);
+    pub(crate) fn LLVMSetFunctionCallConv(Fn: &Value, CC: c_uint);
 
     // Operations on parameters
-    pub fn LLVMIsAArgument(Val: &Value) -> Option<&Value>;
-    pub fn LLVMCountParams(Fn: &Value) -> c_uint;
-    pub fn LLVMGetParam(Fn: &Value, Index: c_uint) -> &Value;
+    pub(crate) fn LLVMIsAArgument(Val: &Value) -> Option<&Value>;
+    pub(crate) safe fn LLVMCountParams(Fn: &Value) -> c_uint;
+    pub(crate) fn LLVMGetParam(Fn: &Value, Index: c_uint) -> &Value;
 
     // Operations on basic blocks
-    pub fn LLVMGetBasicBlockParent(BB: &BasicBlock) -> &Value;
-    pub fn LLVMAppendBasicBlockInContext<'a>(
+    pub(crate) fn LLVMGetBasicBlockParent(BB: &BasicBlock) -> &Value;
+    pub(crate) fn LLVMAppendBasicBlockInContext<'a>(
         C: &'a Context,
         Fn: &'a Value,
         Name: *const c_char,
     ) -> &'a BasicBlock;
 
     // Operations on instructions
-    pub fn LLVMIsAInstruction(Val: &Value) -> Option<&Value>;
-    pub fn LLVMGetFirstBasicBlock(Fn: &Value) -> &BasicBlock;
-    pub fn LLVMGetOperand(Val: &Value, Index: c_uint) -> Option<&Value>;
+    pub(crate) fn LLVMIsAInstruction(Val: &Value) -> Option<&Value>;
+    pub(crate) fn LLVMGetFirstBasicBlock(Fn: &Value) -> &BasicBlock;
+    pub(crate) fn LLVMGetOperand(Val: &Value, Index: c_uint) -> Option<&Value>;
 
     // Operations on call sites
-    pub fn LLVMSetInstructionCallConv(Instr: &Value, CC: c_uint);
+    pub(crate) fn LLVMSetInstructionCallConv(Instr: &Value, CC: c_uint);
 
     // Operations on load/store instructions (only)
-    pub fn LLVMSetVolatile(MemoryAccessInst: &Value, volatile: Bool);
+    pub(crate) fn LLVMSetVolatile(MemoryAccessInst: &Value, volatile: Bool);
 
     // Operations on phi nodes
-    pub fn LLVMAddIncoming<'a>(
+    pub(crate) fn LLVMAddIncoming<'a>(
         PhiNode: &'a Value,
         IncomingValues: *const &'a Value,
         IncomingBlocks: *const &'a BasicBlock,
@@ -1191,362 +1211,367 @@ unsafe extern "C" {
     );
 
     // Instruction builders
-    pub fn LLVMCreateBuilderInContext(C: &Context) -> &mut Builder<'_>;
-    pub fn LLVMPositionBuilderAtEnd<'a>(Builder: &Builder<'a>, Block: &'a BasicBlock);
-    pub fn LLVMGetInsertBlock<'a>(Builder: &Builder<'a>) -> &'a BasicBlock;
-    pub fn LLVMDisposeBuilder<'a>(Builder: &'a mut Builder<'a>);
+    pub(crate) fn LLVMCreateBuilderInContext(C: &Context) -> &mut Builder<'_>;
+    pub(crate) fn LLVMPositionBuilderAtEnd<'a>(Builder: &Builder<'a>, Block: &'a BasicBlock);
+    pub(crate) fn LLVMGetInsertBlock<'a>(Builder: &Builder<'a>) -> &'a BasicBlock;
+    pub(crate) fn LLVMDisposeBuilder<'a>(Builder: &'a mut Builder<'a>);
 
     // Metadata
-    pub fn LLVMSetCurrentDebugLocation2<'a>(Builder: &Builder<'a>, Loc: *const Metadata);
-    pub fn LLVMGetCurrentDebugLocation2<'a>(Builder: &Builder<'a>) -> Option<&'a Metadata>;
+    pub(crate) fn LLVMSetCurrentDebugLocation2<'a>(Builder: &Builder<'a>, Loc: *const Metadata);
+    pub(crate) fn LLVMGetCurrentDebugLocation2<'a>(Builder: &Builder<'a>) -> Option<&'a Metadata>;
 
     // Terminators
-    pub fn LLVMBuildRetVoid<'a>(B: &Builder<'a>) -> &'a Value;
-    pub fn LLVMBuildRet<'a>(B: &Builder<'a>, V: &'a Value) -> &'a Value;
-    pub fn LLVMBuildBr<'a>(B: &Builder<'a>, Dest: &'a BasicBlock) -> &'a Value;
-    pub fn LLVMBuildCondBr<'a>(
+    pub(crate) safe fn LLVMBuildRetVoid<'a>(B: &Builder<'a>) -> &'a Value;
+    pub(crate) fn LLVMBuildRet<'a>(B: &Builder<'a>, V: &'a Value) -> &'a Value;
+    pub(crate) fn LLVMBuildBr<'a>(B: &Builder<'a>, Dest: &'a BasicBlock) -> &'a Value;
+    pub(crate) fn LLVMBuildCondBr<'a>(
         B: &Builder<'a>,
         If: &'a Value,
         Then: &'a BasicBlock,
         Else: &'a BasicBlock,
     ) -> &'a Value;
-    pub fn LLVMBuildSwitch<'a>(
+    pub(crate) fn LLVMBuildSwitch<'a>(
         B: &Builder<'a>,
         V: &'a Value,
         Else: &'a BasicBlock,
         NumCases: c_uint,
     ) -> &'a Value;
-    pub fn LLVMBuildLandingPad<'a>(
+    pub(crate) fn LLVMBuildLandingPad<'a>(
         B: &Builder<'a>,
         Ty: &'a Type,
         PersFn: Option<&'a Value>,
         NumClauses: c_uint,
         Name: *const c_char,
     ) -> &'a Value;
-    pub fn LLVMBuildResume<'a>(B: &Builder<'a>, Exn: &'a Value) -> &'a Value;
-    pub fn LLVMBuildUnreachable<'a>(B: &Builder<'a>) -> &'a Value;
+    pub(crate) fn LLVMBuildResume<'a>(B: &Builder<'a>, Exn: &'a Value) -> &'a Value;
+    pub(crate) fn LLVMBuildUnreachable<'a>(B: &Builder<'a>) -> &'a Value;
 
-    pub fn LLVMBuildCleanupPad<'a>(
+    pub(crate) fn LLVMBuildCleanupPad<'a>(
         B: &Builder<'a>,
         ParentPad: Option<&'a Value>,
         Args: *const &'a Value,
         NumArgs: c_uint,
         Name: *const c_char,
     ) -> Option<&'a Value>;
-    pub fn LLVMBuildCleanupRet<'a>(
+    pub(crate) fn LLVMBuildCleanupRet<'a>(
         B: &Builder<'a>,
         CleanupPad: &'a Value,
         BB: Option<&'a BasicBlock>,
     ) -> Option<&'a Value>;
-    pub fn LLVMBuildCatchPad<'a>(
+    pub(crate) fn LLVMBuildCatchPad<'a>(
         B: &Builder<'a>,
         ParentPad: &'a Value,
         Args: *const &'a Value,
         NumArgs: c_uint,
         Name: *const c_char,
     ) -> Option<&'a Value>;
-    pub fn LLVMBuildCatchRet<'a>(
+    pub(crate) fn LLVMBuildCatchRet<'a>(
         B: &Builder<'a>,
         CatchPad: &'a Value,
         BB: &'a BasicBlock,
     ) -> Option<&'a Value>;
-    pub fn LLVMBuildCatchSwitch<'a>(
+    pub(crate) fn LLVMBuildCatchSwitch<'a>(
         Builder: &Builder<'a>,
         ParentPad: Option<&'a Value>,
         UnwindBB: Option<&'a BasicBlock>,
         NumHandlers: c_uint,
         Name: *const c_char,
     ) -> Option<&'a Value>;
-    pub fn LLVMAddHandler<'a>(CatchSwitch: &'a Value, Dest: &'a BasicBlock);
-    pub fn LLVMSetPersonalityFn<'a>(Func: &'a Value, Pers: &'a Value);
+    pub(crate) fn LLVMAddHandler<'a>(CatchSwitch: &'a Value, Dest: &'a BasicBlock);
+    pub(crate) fn LLVMSetPersonalityFn<'a>(Func: &'a Value, Pers: &'a Value);
 
     // Add a case to the switch instruction
-    pub fn LLVMAddCase<'a>(Switch: &'a Value, OnVal: &'a Value, Dest: &'a BasicBlock);
+    pub(crate) fn LLVMAddCase<'a>(Switch: &'a Value, OnVal: &'a Value, Dest: &'a BasicBlock);
 
     // Add a clause to the landing pad instruction
-    pub fn LLVMAddClause<'a>(LandingPad: &'a Value, ClauseVal: &'a Value);
+    pub(crate) fn LLVMAddClause<'a>(LandingPad: &'a Value, ClauseVal: &'a Value);
 
     // Set the cleanup on a landing pad instruction
-    pub fn LLVMSetCleanup(LandingPad: &Value, Val: Bool);
+    pub(crate) fn LLVMSetCleanup(LandingPad: &Value, Val: Bool);
 
     // Arithmetic
-    pub fn LLVMBuildAdd<'a>(
+    pub(crate) fn LLVMBuildAdd<'a>(
         B: &Builder<'a>,
         LHS: &'a Value,
         RHS: &'a Value,
         Name: *const c_char,
     ) -> &'a Value;
-    pub fn LLVMBuildFAdd<'a>(
+    pub(crate) fn LLVMBuildFAdd<'a>(
         B: &Builder<'a>,
         LHS: &'a Value,
         RHS: &'a Value,
         Name: *const c_char,
     ) -> &'a Value;
-    pub fn LLVMBuildSub<'a>(
+    pub(crate) fn LLVMBuildSub<'a>(
         B: &Builder<'a>,
         LHS: &'a Value,
         RHS: &'a Value,
         Name: *const c_char,
     ) -> &'a Value;
-    pub fn LLVMBuildFSub<'a>(
+    pub(crate) fn LLVMBuildFSub<'a>(
         B: &Builder<'a>,
         LHS: &'a Value,
         RHS: &'a Value,
         Name: *const c_char,
     ) -> &'a Value;
-    pub fn LLVMBuildMul<'a>(
+    pub(crate) fn LLVMBuildMul<'a>(
         B: &Builder<'a>,
         LHS: &'a Value,
         RHS: &'a Value,
         Name: *const c_char,
     ) -> &'a Value;
-    pub fn LLVMBuildFMul<'a>(
+    pub(crate) fn LLVMBuildFMul<'a>(
         B: &Builder<'a>,
         LHS: &'a Value,
         RHS: &'a Value,
         Name: *const c_char,
     ) -> &'a Value;
-    pub fn LLVMBuildUDiv<'a>(
+    pub(crate) fn LLVMBuildUDiv<'a>(
         B: &Builder<'a>,
         LHS: &'a Value,
         RHS: &'a Value,
         Name: *const c_char,
     ) -> &'a Value;
-    pub fn LLVMBuildExactUDiv<'a>(
+    pub(crate) fn LLVMBuildExactUDiv<'a>(
         B: &Builder<'a>,
         LHS: &'a Value,
         RHS: &'a Value,
         Name: *const c_char,
     ) -> &'a Value;
-    pub fn LLVMBuildSDiv<'a>(
+    pub(crate) fn LLVMBuildSDiv<'a>(
         B: &Builder<'a>,
         LHS: &'a Value,
         RHS: &'a Value,
         Name: *const c_char,
     ) -> &'a Value;
-    pub fn LLVMBuildExactSDiv<'a>(
+    pub(crate) fn LLVMBuildExactSDiv<'a>(
         B: &Builder<'a>,
         LHS: &'a Value,
         RHS: &'a Value,
         Name: *const c_char,
     ) -> &'a Value;
-    pub fn LLVMBuildFDiv<'a>(
+    pub(crate) fn LLVMBuildFDiv<'a>(
         B: &Builder<'a>,
         LHS: &'a Value,
         RHS: &'a Value,
         Name: *const c_char,
     ) -> &'a Value;
-    pub fn LLVMBuildURem<'a>(
+    pub(crate) fn LLVMBuildURem<'a>(
         B: &Builder<'a>,
         LHS: &'a Value,
         RHS: &'a Value,
         Name: *const c_char,
     ) -> &'a Value;
-    pub fn LLVMBuildSRem<'a>(
+    pub(crate) fn LLVMBuildSRem<'a>(
         B: &Builder<'a>,
         LHS: &'a Value,
         RHS: &'a Value,
         Name: *const c_char,
     ) -> &'a Value;
-    pub fn LLVMBuildFRem<'a>(
+    pub(crate) fn LLVMBuildFRem<'a>(
         B: &Builder<'a>,
         LHS: &'a Value,
         RHS: &'a Value,
         Name: *const c_char,
     ) -> &'a Value;
-    pub fn LLVMBuildShl<'a>(
+    pub(crate) fn LLVMBuildShl<'a>(
         B: &Builder<'a>,
         LHS: &'a Value,
         RHS: &'a Value,
         Name: *const c_char,
     ) -> &'a Value;
-    pub fn LLVMBuildLShr<'a>(
+    pub(crate) fn LLVMBuildLShr<'a>(
         B: &Builder<'a>,
         LHS: &'a Value,
         RHS: &'a Value,
         Name: *const c_char,
     ) -> &'a Value;
-    pub fn LLVMBuildAShr<'a>(
+    pub(crate) fn LLVMBuildAShr<'a>(
         B: &Builder<'a>,
         LHS: &'a Value,
         RHS: &'a Value,
         Name: *const c_char,
     ) -> &'a Value;
-    pub fn LLVMBuildNSWAdd<'a>(
+    pub(crate) fn LLVMBuildNSWAdd<'a>(
         B: &Builder<'a>,
         LHS: &'a Value,
         RHS: &'a Value,
         Name: *const c_char,
     ) -> &'a Value;
-    pub fn LLVMBuildNUWAdd<'a>(
+    pub(crate) fn LLVMBuildNUWAdd<'a>(
         B: &Builder<'a>,
         LHS: &'a Value,
         RHS: &'a Value,
         Name: *const c_char,
     ) -> &'a Value;
-    pub fn LLVMBuildNSWSub<'a>(
+    pub(crate) fn LLVMBuildNSWSub<'a>(
         B: &Builder<'a>,
         LHS: &'a Value,
         RHS: &'a Value,
         Name: *const c_char,
     ) -> &'a Value;
-    pub fn LLVMBuildNUWSub<'a>(
+    pub(crate) fn LLVMBuildNUWSub<'a>(
         B: &Builder<'a>,
         LHS: &'a Value,
         RHS: &'a Value,
         Name: *const c_char,
     ) -> &'a Value;
-    pub fn LLVMBuildNSWMul<'a>(
+    pub(crate) fn LLVMBuildNSWMul<'a>(
         B: &Builder<'a>,
         LHS: &'a Value,
         RHS: &'a Value,
         Name: *const c_char,
     ) -> &'a Value;
-    pub fn LLVMBuildNUWMul<'a>(
+    pub(crate) fn LLVMBuildNUWMul<'a>(
         B: &Builder<'a>,
         LHS: &'a Value,
         RHS: &'a Value,
         Name: *const c_char,
     ) -> &'a Value;
-    pub fn LLVMBuildAnd<'a>(
+    pub(crate) fn LLVMBuildAnd<'a>(
         B: &Builder<'a>,
         LHS: &'a Value,
         RHS: &'a Value,
         Name: *const c_char,
     ) -> &'a Value;
-    pub fn LLVMBuildOr<'a>(
+    pub(crate) fn LLVMBuildOr<'a>(
         B: &Builder<'a>,
         LHS: &'a Value,
         RHS: &'a Value,
         Name: *const c_char,
     ) -> &'a Value;
-    pub fn LLVMBuildXor<'a>(
+    pub(crate) fn LLVMBuildXor<'a>(
         B: &Builder<'a>,
         LHS: &'a Value,
         RHS: &'a Value,
         Name: *const c_char,
     ) -> &'a Value;
-    pub fn LLVMBuildNeg<'a>(B: &Builder<'a>, V: &'a Value, Name: *const c_char) -> &'a Value;
-    pub fn LLVMBuildFNeg<'a>(B: &Builder<'a>, V: &'a Value, Name: *const c_char) -> &'a Value;
-    pub fn LLVMBuildNot<'a>(B: &Builder<'a>, V: &'a Value, Name: *const c_char) -> &'a Value;
+    pub(crate) fn LLVMBuildNeg<'a>(B: &Builder<'a>, V: &'a Value, Name: *const c_char)
+    -> &'a Value;
+    pub(crate) fn LLVMBuildFNeg<'a>(
+        B: &Builder<'a>,
+        V: &'a Value,
+        Name: *const c_char,
+    ) -> &'a Value;
+    pub(crate) fn LLVMBuildNot<'a>(B: &Builder<'a>, V: &'a Value, Name: *const c_char)
+    -> &'a Value;
 
     // Extra flags on arithmetic
-    pub fn LLVMSetIsDisjoint(Instr: &Value, IsDisjoint: Bool);
+    pub(crate) fn LLVMSetIsDisjoint(Instr: &Value, IsDisjoint: Bool);
+    pub(crate) fn LLVMSetNUW(ArithInst: &Value, HasNUW: Bool);
+    pub(crate) fn LLVMSetNSW(ArithInst: &Value, HasNSW: Bool);
 
     // Memory
-    pub fn LLVMBuildAlloca<'a>(B: &Builder<'a>, Ty: &'a Type, Name: *const c_char) -> &'a Value;
-    pub fn LLVMBuildArrayAlloca<'a>(
+    pub(crate) fn LLVMBuildAlloca<'a>(
         B: &Builder<'a>,
         Ty: &'a Type,
-        Val: &'a Value,
         Name: *const c_char,
     ) -> &'a Value;
-    pub fn LLVMBuildLoad2<'a>(
+    pub(crate) fn LLVMBuildArrayAlloca<'a>(
         B: &Builder<'a>,
         Ty: &'a Type,
-        PointerVal: &'a Value,
+        Val: &'a Value,
         Name: *const c_char,
     ) -> &'a Value;
-
-    pub fn LLVMBuildStore<'a>(B: &Builder<'a>, Val: &'a Value, Ptr: &'a Value) -> &'a Value;
-
-    pub fn LLVMBuildGEP2<'a>(
+    pub(crate) fn LLVMBuildLoad2<'a>(
         B: &Builder<'a>,
         Ty: &'a Type,
-        Pointer: &'a Value,
-        Indices: *const &'a Value,
-        NumIndices: c_uint,
+        PointerVal: &'a Value,
         Name: *const c_char,
     ) -> &'a Value;
-    pub fn LLVMBuildInBoundsGEP2<'a>(
+
+    pub(crate) fn LLVMBuildStore<'a>(B: &Builder<'a>, Val: &'a Value, Ptr: &'a Value) -> &'a Value;
+
+    pub(crate) fn LLVMBuildGEPWithNoWrapFlags<'a>(
         B: &Builder<'a>,
         Ty: &'a Type,
         Pointer: &'a Value,
         Indices: *const &'a Value,
         NumIndices: c_uint,
         Name: *const c_char,
+        Flags: GEPNoWrapFlags,
     ) -> &'a Value;
 
     // Casts
-    pub fn LLVMBuildTrunc<'a>(
+    pub(crate) fn LLVMBuildTrunc<'a>(
         B: &Builder<'a>,
         Val: &'a Value,
         DestTy: &'a Type,
         Name: *const c_char,
     ) -> &'a Value;
-    pub fn LLVMBuildZExt<'a>(
+    pub(crate) fn LLVMBuildZExt<'a>(
         B: &Builder<'a>,
         Val: &'a Value,
         DestTy: &'a Type,
         Name: *const c_char,
     ) -> &'a Value;
-    pub fn LLVMBuildSExt<'a>(
+    pub(crate) fn LLVMBuildSExt<'a>(
         B: &Builder<'a>,
         Val: &'a Value,
         DestTy: &'a Type,
         Name: *const c_char,
     ) -> &'a Value;
-    pub fn LLVMBuildFPToUI<'a>(
+    pub(crate) fn LLVMBuildFPToUI<'a>(
         B: &Builder<'a>,
         Val: &'a Value,
         DestTy: &'a Type,
         Name: *const c_char,
     ) -> &'a Value;
-    pub fn LLVMBuildFPToSI<'a>(
+    pub(crate) fn LLVMBuildFPToSI<'a>(
         B: &Builder<'a>,
         Val: &'a Value,
         DestTy: &'a Type,
         Name: *const c_char,
     ) -> &'a Value;
-    pub fn LLVMBuildUIToFP<'a>(
+    pub(crate) fn LLVMBuildUIToFP<'a>(
         B: &Builder<'a>,
         Val: &'a Value,
         DestTy: &'a Type,
         Name: *const c_char,
     ) -> &'a Value;
-    pub fn LLVMBuildSIToFP<'a>(
+    pub(crate) fn LLVMBuildSIToFP<'a>(
         B: &Builder<'a>,
         Val: &'a Value,
         DestTy: &'a Type,
         Name: *const c_char,
     ) -> &'a Value;
-    pub fn LLVMBuildFPTrunc<'a>(
+    pub(crate) fn LLVMBuildFPTrunc<'a>(
         B: &Builder<'a>,
         Val: &'a Value,
         DestTy: &'a Type,
         Name: *const c_char,
     ) -> &'a Value;
-    pub fn LLVMBuildFPExt<'a>(
+    pub(crate) fn LLVMBuildFPExt<'a>(
         B: &Builder<'a>,
         Val: &'a Value,
         DestTy: &'a Type,
         Name: *const c_char,
     ) -> &'a Value;
-    pub fn LLVMBuildPtrToInt<'a>(
+    pub(crate) fn LLVMBuildPtrToInt<'a>(
         B: &Builder<'a>,
         Val: &'a Value,
         DestTy: &'a Type,
         Name: *const c_char,
     ) -> &'a Value;
-    pub fn LLVMBuildIntToPtr<'a>(
+    pub(crate) fn LLVMBuildIntToPtr<'a>(
         B: &Builder<'a>,
         Val: &'a Value,
         DestTy: &'a Type,
         Name: *const c_char,
     ) -> &'a Value;
-    pub fn LLVMBuildBitCast<'a>(
+    pub(crate) fn LLVMBuildBitCast<'a>(
         B: &Builder<'a>,
         Val: &'a Value,
         DestTy: &'a Type,
         Name: *const c_char,
     ) -> &'a Value;
-    pub fn LLVMBuildPointerCast<'a>(
+    pub(crate) fn LLVMBuildPointerCast<'a>(
         B: &Builder<'a>,
         Val: &'a Value,
         DestTy: &'a Type,
         Name: *const c_char,
     ) -> &'a Value;
-    pub fn LLVMBuildIntCast2<'a>(
+    pub(crate) fn LLVMBuildIntCast2<'a>(
         B: &Builder<'a>,
         Val: &'a Value,
         DestTy: &'a Type,
@@ -1555,14 +1580,14 @@ unsafe extern "C" {
     ) -> &'a Value;
 
     // Comparisons
-    pub fn LLVMBuildICmp<'a>(
+    pub(crate) fn LLVMBuildICmp<'a>(
         B: &Builder<'a>,
         Op: c_uint,
         LHS: &'a Value,
         RHS: &'a Value,
         Name: *const c_char,
     ) -> &'a Value;
-    pub fn LLVMBuildFCmp<'a>(
+    pub(crate) fn LLVMBuildFCmp<'a>(
         B: &Builder<'a>,
         Op: c_uint,
         LHS: &'a Value,
@@ -1571,47 +1596,48 @@ unsafe extern "C" {
     ) -> &'a Value;
 
     // Miscellaneous instructions
-    pub fn LLVMBuildPhi<'a>(B: &Builder<'a>, Ty: &'a Type, Name: *const c_char) -> &'a Value;
-    pub fn LLVMBuildSelect<'a>(
+    pub(crate) fn LLVMBuildPhi<'a>(B: &Builder<'a>, Ty: &'a Type, Name: *const c_char)
+    -> &'a Value;
+    pub(crate) fn LLVMBuildSelect<'a>(
         B: &Builder<'a>,
         If: &'a Value,
         Then: &'a Value,
         Else: &'a Value,
         Name: *const c_char,
     ) -> &'a Value;
-    pub fn LLVMBuildVAArg<'a>(
+    pub(crate) fn LLVMBuildVAArg<'a>(
         B: &Builder<'a>,
         list: &'a Value,
         Ty: &'a Type,
         Name: *const c_char,
     ) -> &'a Value;
-    pub fn LLVMBuildExtractElement<'a>(
+    pub(crate) fn LLVMBuildExtractElement<'a>(
         B: &Builder<'a>,
         VecVal: &'a Value,
         Index: &'a Value,
         Name: *const c_char,
     ) -> &'a Value;
-    pub fn LLVMBuildInsertElement<'a>(
+    pub(crate) fn LLVMBuildInsertElement<'a>(
         B: &Builder<'a>,
         VecVal: &'a Value,
         EltVal: &'a Value,
         Index: &'a Value,
         Name: *const c_char,
     ) -> &'a Value;
-    pub fn LLVMBuildShuffleVector<'a>(
+    pub(crate) fn LLVMBuildShuffleVector<'a>(
         B: &Builder<'a>,
         V1: &'a Value,
         V2: &'a Value,
         Mask: &'a Value,
         Name: *const c_char,
     ) -> &'a Value;
-    pub fn LLVMBuildExtractValue<'a>(
+    pub(crate) fn LLVMBuildExtractValue<'a>(
         B: &Builder<'a>,
         AggVal: &'a Value,
         Index: c_uint,
         Name: *const c_char,
     ) -> &'a Value;
-    pub fn LLVMBuildInsertValue<'a>(
+    pub(crate) fn LLVMBuildInsertValue<'a>(
         B: &Builder<'a>,
         AggVal: &'a Value,
         EltVal: &'a Value,
@@ -1620,7 +1646,7 @@ unsafe extern "C" {
     ) -> &'a Value;
 
     // Atomic Operations
-    pub fn LLVMBuildAtomicCmpXchg<'a>(
+    pub(crate) fn LLVMBuildAtomicCmpXchg<'a>(
         B: &Builder<'a>,
         LHS: &'a Value,
         CMP: &'a Value,
@@ -1630,9 +1656,9 @@ unsafe extern "C" {
         SingleThreaded: Bool,
     ) -> &'a Value;
 
-    pub fn LLVMSetWeak(CmpXchgInst: &Value, IsWeak: Bool);
+    pub(crate) fn LLVMSetWeak(CmpXchgInst: &Value, IsWeak: Bool);
 
-    pub fn LLVMBuildAtomicRMW<'a>(
+    pub(crate) fn LLVMBuildAtomicRMW<'a>(
         B: &Builder<'a>,
         Op: AtomicRmwBinOp,
         LHS: &'a Value,
@@ -1641,7 +1667,7 @@ unsafe extern "C" {
         SingleThreaded: Bool,
     ) -> &'a Value;
 
-    pub fn LLVMBuildFence<'a>(
+    pub(crate) fn LLVMBuildFence<'a>(
         B: &Builder<'a>,
         Order: AtomicOrdering,
         SingleThreaded: Bool,
@@ -1649,36 +1675,36 @@ unsafe extern "C" {
     ) -> &'a Value;
 
     /// Writes a module to the specified path. Returns 0 on success.
-    pub fn LLVMWriteBitcodeToFile(M: &Module, Path: *const c_char) -> c_int;
+    pub(crate) fn LLVMWriteBitcodeToFile(M: &Module, Path: *const c_char) -> c_int;
 
     /// Creates a legacy pass manager -- only used for final codegen.
-    pub fn LLVMCreatePassManager<'a>() -> &'a mut PassManager<'a>;
+    pub(crate) fn LLVMCreatePassManager<'a>() -> &'a mut PassManager<'a>;
 
-    pub fn LLVMAddAnalysisPasses<'a>(T: &'a TargetMachine, PM: &PassManager<'a>);
+    pub(crate) fn LLVMAddAnalysisPasses<'a>(T: &'a TargetMachine, PM: &PassManager<'a>);
 
-    pub fn LLVMGetHostCPUFeatures() -> *mut c_char;
+    pub(crate) fn LLVMGetHostCPUFeatures() -> *mut c_char;
 
-    pub fn LLVMDisposeMessage(message: *mut c_char);
+    pub(crate) fn LLVMDisposeMessage(message: *mut c_char);
 
-    pub fn LLVMIsMultithreaded() -> Bool;
+    pub(crate) fn LLVMIsMultithreaded() -> Bool;
 
-    pub fn LLVMStructCreateNamed(C: &Context, Name: *const c_char) -> &Type;
+    pub(crate) fn LLVMStructCreateNamed(C: &Context, Name: *const c_char) -> &Type;
 
-    pub fn LLVMStructSetBody<'a>(
+    pub(crate) fn LLVMStructSetBody<'a>(
         StructTy: &'a Type,
         ElementTypes: *const &'a Type,
         ElementCount: c_uint,
         Packed: Bool,
     );
 
-    pub fn LLVMMetadataAsValue<'a>(C: &'a Context, MD: &'a Metadata) -> &'a Value;
+    pub(crate) safe fn LLVMMetadataAsValue<'a>(C: &'a Context, MD: &'a Metadata) -> &'a Value;
 
-    pub fn LLVMSetUnnamedAddress(Global: &Value, UnnamedAddr: UnnamedAddr);
+    pub(crate) fn LLVMSetUnnamedAddress(Global: &Value, UnnamedAddr: UnnamedAddr);
 
-    pub fn LLVMIsAConstantInt(value_ref: &Value) -> Option<&ConstantInt>;
+    pub(crate) fn LLVMIsAConstantInt(value_ref: &Value) -> Option<&ConstantInt>;
 
-    pub fn LLVMGetOrInsertComdat(M: &Module, Name: *const c_char) -> &Comdat;
-    pub fn LLVMSetComdat(V: &Value, C: &Comdat);
+    pub(crate) fn LLVMGetOrInsertComdat(M: &Module, Name: *const c_char) -> &Comdat;
+    pub(crate) fn LLVMSetComdat(V: &Value, C: &Comdat);
 
     pub(crate) fn LLVMCreateOperandBundle(
         Tag: *const c_char,
@@ -1771,22 +1797,26 @@ unsafe extern "C" {
 
 #[link(name = "llvm-wrapper", kind = "static")]
 unsafe extern "C" {
-    pub fn LLVMRustInstallErrorHandlers();
-    pub fn LLVMRustDisableSystemDialogsOnCrash();
+    pub(crate) fn LLVMRustInstallErrorHandlers();
+    pub(crate) fn LLVMRustDisableSystemDialogsOnCrash();
 
     // Create and destroy contexts.
-    pub fn LLVMRustContextCreate(shouldDiscardNames: bool) -> &'static mut Context;
+    pub(crate) fn LLVMRustContextCreate(shouldDiscardNames: bool) -> &'static mut Context;
 
     /// See llvm::LLVMTypeKind::getTypeID.
-    pub fn LLVMRustGetTypeKind(Ty: &Type) -> TypeKind;
+    pub(crate) fn LLVMRustGetTypeKind(Ty: &Type) -> TypeKind;
 
     // Operations on all values
-    pub fn LLVMRustGlobalAddMetadata<'a>(Val: &'a Value, KindID: c_uint, Metadata: &'a Metadata);
-    pub fn LLVMRustIsNonGVFunctionPointerTy(Val: &Value) -> bool;
+    pub(crate) fn LLVMRustGlobalAddMetadata<'a>(
+        Val: &'a Value,
+        KindID: c_uint,
+        Metadata: &'a Metadata,
+    );
+    pub(crate) fn LLVMRustIsNonGVFunctionPointerTy(Val: &Value) -> bool;
 
     // Operations on scalar constants
-    pub fn LLVMRustConstIntGetZExtValue(ConstantVal: &ConstantInt, Value: &mut u64) -> bool;
-    pub fn LLVMRustConstInt128Get(
+    pub(crate) fn LLVMRustConstIntGetZExtValue(ConstantVal: &ConstantInt, Value: &mut u64) -> bool;
+    pub(crate) fn LLVMRustConstInt128Get(
         ConstantVal: &ConstantInt,
         SExt: bool,
         high: &mut u64,
@@ -1794,36 +1824,38 @@ unsafe extern "C" {
     ) -> bool;
 
     // Operations on global variables, functions, and aliases (globals)
-    pub fn LLVMRustSetDSOLocal(Global: &Value, is_dso_local: bool);
+    pub(crate) fn LLVMRustSetDSOLocal(Global: &Value, is_dso_local: bool);
 
     // Operations on global variables
-    pub fn LLVMRustGetOrInsertGlobal<'a>(
+    pub(crate) fn LLVMRustGetOrInsertGlobal<'a>(
         M: &'a Module,
         Name: *const c_char,
         NameLen: size_t,
         T: &'a Type,
     ) -> &'a Value;
-    pub fn LLVMRustInsertPrivateGlobal<'a>(M: &'a Module, T: &'a Type) -> &'a Value;
-    pub fn LLVMRustGetNamedValue(
+    pub(crate) fn LLVMRustInsertPrivateGlobal<'a>(M: &'a Module, T: &'a Type) -> &'a Value;
+    pub(crate) fn LLVMRustGetNamedValue(
         M: &Module,
         Name: *const c_char,
         NameLen: size_t,
     ) -> Option<&Value>;
-    pub fn LLVMRustSetTailCallKind(CallInst: &Value, TKC: TailCallKind);
 
     // Operations on attributes
-    pub fn LLVMRustCreateAttrNoValue(C: &Context, attr: AttributeKind) -> &Attribute;
-    pub fn LLVMRustCreateAlignmentAttr(C: &Context, bytes: u64) -> &Attribute;
-    pub fn LLVMRustCreateDereferenceableAttr(C: &Context, bytes: u64) -> &Attribute;
-    pub fn LLVMRustCreateDereferenceableOrNullAttr(C: &Context, bytes: u64) -> &Attribute;
-    pub fn LLVMRustCreateByValAttr<'a>(C: &'a Context, ty: &'a Type) -> &'a Attribute;
-    pub fn LLVMRustCreateStructRetAttr<'a>(C: &'a Context, ty: &'a Type) -> &'a Attribute;
-    pub fn LLVMRustCreateElementTypeAttr<'a>(C: &'a Context, ty: &'a Type) -> &'a Attribute;
-    pub fn LLVMRustCreateUWTableAttr(C: &Context, async_: bool) -> &Attribute;
-    pub fn LLVMRustCreateAllocSizeAttr(C: &Context, size_arg: u32) -> &Attribute;
-    pub fn LLVMRustCreateAllocKindAttr(C: &Context, size_arg: u64) -> &Attribute;
-    pub fn LLVMRustCreateMemoryEffectsAttr(C: &Context, effects: MemoryEffects) -> &Attribute;
-    pub fn LLVMRustCreateRangeAttribute(
+    pub(crate) fn LLVMRustCreateAttrNoValue(C: &Context, attr: AttributeKind) -> &Attribute;
+    pub(crate) fn LLVMRustCreateAlignmentAttr(C: &Context, bytes: u64) -> &Attribute;
+    pub(crate) fn LLVMRustCreateDereferenceableAttr(C: &Context, bytes: u64) -> &Attribute;
+    pub(crate) fn LLVMRustCreateDereferenceableOrNullAttr(C: &Context, bytes: u64) -> &Attribute;
+    pub(crate) fn LLVMRustCreateByValAttr<'a>(C: &'a Context, ty: &'a Type) -> &'a Attribute;
+    pub(crate) fn LLVMRustCreateStructRetAttr<'a>(C: &'a Context, ty: &'a Type) -> &'a Attribute;
+    pub(crate) fn LLVMRustCreateElementTypeAttr<'a>(C: &'a Context, ty: &'a Type) -> &'a Attribute;
+    pub(crate) fn LLVMRustCreateUWTableAttr(C: &Context, async_: bool) -> &Attribute;
+    pub(crate) fn LLVMRustCreateAllocSizeAttr(C: &Context, size_arg: u32) -> &Attribute;
+    pub(crate) fn LLVMRustCreateAllocKindAttr(C: &Context, size_arg: u64) -> &Attribute;
+    pub(crate) fn LLVMRustCreateMemoryEffectsAttr(
+        C: &Context,
+        effects: MemoryEffects,
+    ) -> &Attribute;
+    pub(crate) fn LLVMRustCreateRangeAttribute(
         C: &Context,
         num_bits: c_uint,
         lower_words: *const u64,
@@ -1831,13 +1863,13 @@ unsafe extern "C" {
     ) -> &Attribute;
 
     // Operations on functions
-    pub fn LLVMRustGetOrInsertFunction<'a>(
+    pub(crate) fn LLVMRustGetOrInsertFunction<'a>(
         M: &'a Module,
         Name: *const c_char,
         NameLen: size_t,
         FunctionTy: &'a Type,
     ) -> &'a Value;
-    pub fn LLVMRustAddFunctionAttributes<'a>(
+    pub(crate) fn LLVMRustAddFunctionAttributes<'a>(
         Fn: &'a Value,
         index: c_uint,
         Attrs: *const &'a Attribute,
@@ -1845,19 +1877,19 @@ unsafe extern "C" {
     );
 
     // Operations on call sites
-    pub fn LLVMRustAddCallSiteAttributes<'a>(
+    pub(crate) fn LLVMRustAddCallSiteAttributes<'a>(
         Instr: &'a Value,
         index: c_uint,
         Attrs: *const &'a Attribute,
         AttrsLen: size_t,
     );
 
-    pub fn LLVMRustSetFastMath(Instr: &Value);
-    pub fn LLVMRustSetAlgebraicMath(Instr: &Value);
-    pub fn LLVMRustSetAllowReassoc(Instr: &Value);
+    pub(crate) fn LLVMRustSetFastMath(Instr: &Value);
+    pub(crate) fn LLVMRustSetAlgebraicMath(Instr: &Value);
+    pub(crate) fn LLVMRustSetAllowReassoc(Instr: &Value);
 
     // Miscellaneous instructions
-    pub fn LLVMRustBuildMemCpy<'a>(
+    pub(crate) fn LLVMRustBuildMemCpy<'a>(
         B: &Builder<'a>,
         Dst: &'a Value,
         DstAlign: c_uint,
@@ -1866,7 +1898,7 @@ unsafe extern "C" {
         Size: &'a Value,
         IsVolatile: bool,
     ) -> &'a Value;
-    pub fn LLVMRustBuildMemMove<'a>(
+    pub(crate) fn LLVMRustBuildMemMove<'a>(
         B: &Builder<'a>,
         Dst: &'a Value,
         DstAlign: c_uint,
@@ -1875,7 +1907,7 @@ unsafe extern "C" {
         Size: &'a Value,
         IsVolatile: bool,
     ) -> &'a Value;
-    pub fn LLVMRustBuildMemSet<'a>(
+    pub(crate) fn LLVMRustBuildMemSet<'a>(
         B: &Builder<'a>,
         Dst: &'a Value,
         DstAlign: c_uint,
@@ -1884,47 +1916,55 @@ unsafe extern "C" {
         IsVolatile: bool,
     ) -> &'a Value;
 
-    pub fn LLVMRustBuildVectorReduceFAdd<'a>(
+    pub(crate) fn LLVMRustBuildVectorReduceFAdd<'a>(
         B: &Builder<'a>,
         Acc: &'a Value,
         Src: &'a Value,
     ) -> &'a Value;
-    pub fn LLVMRustBuildVectorReduceFMul<'a>(
+    pub(crate) fn LLVMRustBuildVectorReduceFMul<'a>(
         B: &Builder<'a>,
         Acc: &'a Value,
         Src: &'a Value,
     ) -> &'a Value;
-    pub fn LLVMRustBuildVectorReduceAdd<'a>(B: &Builder<'a>, Src: &'a Value) -> &'a Value;
-    pub fn LLVMRustBuildVectorReduceMul<'a>(B: &Builder<'a>, Src: &'a Value) -> &'a Value;
-    pub fn LLVMRustBuildVectorReduceAnd<'a>(B: &Builder<'a>, Src: &'a Value) -> &'a Value;
-    pub fn LLVMRustBuildVectorReduceOr<'a>(B: &Builder<'a>, Src: &'a Value) -> &'a Value;
-    pub fn LLVMRustBuildVectorReduceXor<'a>(B: &Builder<'a>, Src: &'a Value) -> &'a Value;
-    pub fn LLVMRustBuildVectorReduceMin<'a>(
+    pub(crate) fn LLVMRustBuildVectorReduceAdd<'a>(B: &Builder<'a>, Src: &'a Value) -> &'a Value;
+    pub(crate) fn LLVMRustBuildVectorReduceMul<'a>(B: &Builder<'a>, Src: &'a Value) -> &'a Value;
+    pub(crate) fn LLVMRustBuildVectorReduceAnd<'a>(B: &Builder<'a>, Src: &'a Value) -> &'a Value;
+    pub(crate) fn LLVMRustBuildVectorReduceOr<'a>(B: &Builder<'a>, Src: &'a Value) -> &'a Value;
+    pub(crate) fn LLVMRustBuildVectorReduceXor<'a>(B: &Builder<'a>, Src: &'a Value) -> &'a Value;
+    pub(crate) fn LLVMRustBuildVectorReduceMin<'a>(
         B: &Builder<'a>,
         Src: &'a Value,
         IsSigned: bool,
     ) -> &'a Value;
-    pub fn LLVMRustBuildVectorReduceMax<'a>(
+    pub(crate) fn LLVMRustBuildVectorReduceMax<'a>(
         B: &Builder<'a>,
         Src: &'a Value,
         IsSigned: bool,
     ) -> &'a Value;
-    pub fn LLVMRustBuildVectorReduceFMin<'a>(
+    pub(crate) fn LLVMRustBuildVectorReduceFMin<'a>(
         B: &Builder<'a>,
         Src: &'a Value,
         IsNaN: bool,
     ) -> &'a Value;
-    pub fn LLVMRustBuildVectorReduceFMax<'a>(
+    pub(crate) fn LLVMRustBuildVectorReduceFMax<'a>(
         B: &Builder<'a>,
         Src: &'a Value,
         IsNaN: bool,
     ) -> &'a Value;
 
-    pub fn LLVMRustBuildMinNum<'a>(B: &Builder<'a>, LHS: &'a Value, LHS: &'a Value) -> &'a Value;
-    pub fn LLVMRustBuildMaxNum<'a>(B: &Builder<'a>, LHS: &'a Value, LHS: &'a Value) -> &'a Value;
+    pub(crate) fn LLVMRustBuildMinNum<'a>(
+        B: &Builder<'a>,
+        LHS: &'a Value,
+        LHS: &'a Value,
+    ) -> &'a Value;
+    pub(crate) fn LLVMRustBuildMaxNum<'a>(
+        B: &Builder<'a>,
+        LHS: &'a Value,
+        LHS: &'a Value,
+    ) -> &'a Value;
 
     // Atomic Operations
-    pub fn LLVMRustBuildAtomicLoad<'a>(
+    pub(crate) fn LLVMRustBuildAtomicLoad<'a>(
         B: &Builder<'a>,
         ElementType: &'a Type,
         PointerVal: &'a Value,
@@ -1932,21 +1972,21 @@ unsafe extern "C" {
         Order: AtomicOrdering,
     ) -> &'a Value;
 
-    pub fn LLVMRustBuildAtomicStore<'a>(
+    pub(crate) fn LLVMRustBuildAtomicStore<'a>(
         B: &Builder<'a>,
         Val: &'a Value,
         Ptr: &'a Value,
         Order: AtomicOrdering,
     ) -> &'a Value;
 
-    pub fn LLVMRustTimeTraceProfilerInitialize();
+    pub(crate) fn LLVMRustTimeTraceProfilerInitialize();
 
-    pub fn LLVMRustTimeTraceProfilerFinishThread();
+    pub(crate) fn LLVMRustTimeTraceProfilerFinishThread();
 
-    pub fn LLVMRustTimeTraceProfilerFinish(FileName: *const c_char);
+    pub(crate) fn LLVMRustTimeTraceProfilerFinish(FileName: *const c_char);
 
     /// Returns a string describing the last error caused by an LLVMRust* call.
-    pub fn LLVMRustGetLastError() -> *const c_char;
+    pub(crate) fn LLVMRustGetLastError() -> *const c_char;
 
     /// Prints the timing information collected by `-Ztime-llvm-passes`.
     pub(crate) fn LLVMRustPrintPassTimings(OutStr: &RustString);
@@ -1955,7 +1995,7 @@ unsafe extern "C" {
     pub(crate) fn LLVMRustPrintStatistics(OutStr: &RustString);
 
     /// Prepares inline assembly.
-    pub fn LLVMRustInlineAsm(
+    pub(crate) fn LLVMRustInlineAsm(
         Ty: &Type,
         AsmString: *const c_char,
         AsmStringLen: size_t,
@@ -1966,7 +2006,7 @@ unsafe extern "C" {
         Dialect: AsmDialect,
         CanThrow: Bool,
     ) -> &Value;
-    pub fn LLVMRustInlineAsmVerify(
+    pub(crate) fn LLVMRustInlineAsmVerify(
         Ty: &Type,
         Constraints: *const c_char,
         ConstraintsLen: size_t,
@@ -1987,6 +2027,8 @@ unsafe extern "C" {
         NumExpressions: size_t,
         CodeRegions: *const crate::coverageinfo::ffi::CodeRegion,
         NumCodeRegions: size_t,
+        ExpansionRegions: *const crate::coverageinfo::ffi::ExpansionRegion,
+        NumExpansionRegions: size_t,
         BranchRegions: *const crate::coverageinfo::ffi::BranchRegion,
         NumBranchRegions: size_t,
         MCDCBranchRegions: *const crate::coverageinfo::ffi::MCDCBranchRegion,
@@ -2010,16 +2052,16 @@ unsafe extern "C" {
     pub(crate) fn LLVMRustCoverageWriteCovmapVarNameToString(OutStr: &RustString);
 
     pub(crate) fn LLVMRustCoverageMappingVersion() -> u32;
-    pub fn LLVMRustDebugMetadataVersion() -> u32;
-    pub fn LLVMRustVersionMajor() -> u32;
-    pub fn LLVMRustVersionMinor() -> u32;
-    pub fn LLVMRustVersionPatch() -> u32;
+    pub(crate) fn LLVMRustDebugMetadataVersion() -> u32;
+    pub(crate) fn LLVMRustVersionMajor() -> u32;
+    pub(crate) fn LLVMRustVersionMinor() -> u32;
+    pub(crate) fn LLVMRustVersionPatch() -> u32;
 
     /// Add LLVM module flags.
     ///
     /// In order for Rust-C LTO to work, module flags must be compatible with Clang. What
     /// "compatible" means depends on the merge behaviors involved.
-    pub fn LLVMRustAddModuleFlagU32(
+    pub(crate) fn LLVMRustAddModuleFlagU32(
         M: &Module,
         MergeBehavior: ModuleFlagMergeBehavior,
         Name: *const c_char,
@@ -2027,7 +2069,7 @@ unsafe extern "C" {
         Value: u32,
     );
 
-    pub fn LLVMRustAddModuleFlagString(
+    pub(crate) fn LLVMRustAddModuleFlagString(
         M: &Module,
         MergeBehavior: ModuleFlagMergeBehavior,
         Name: *const c_char,
@@ -2036,7 +2078,7 @@ unsafe extern "C" {
         ValueLen: size_t,
     );
 
-    pub fn LLVMRustDIBuilderCreateCompileUnit<'a>(
+    pub(crate) fn LLVMRustDIBuilderCreateCompileUnit<'a>(
         Builder: &DIBuilder<'a>,
         Lang: c_uint,
         File: &'a DIFile,
@@ -2053,7 +2095,7 @@ unsafe extern "C" {
         DebugNameTableKind: DebugNameTableKind,
     ) -> &'a DIDescriptor;
 
-    pub fn LLVMRustDIBuilderCreateFile<'a>(
+    pub(crate) fn LLVMRustDIBuilderCreateFile<'a>(
         Builder: &DIBuilder<'a>,
         Filename: *const c_char,
         FilenameLen: size_t,
@@ -2066,12 +2108,12 @@ unsafe extern "C" {
         SourceLen: size_t,
     ) -> &'a DIFile;
 
-    pub fn LLVMRustDIBuilderCreateSubroutineType<'a>(
+    pub(crate) fn LLVMRustDIBuilderCreateSubroutineType<'a>(
         Builder: &DIBuilder<'a>,
         ParameterTypes: &'a DIArray,
     ) -> &'a DICompositeType;
 
-    pub fn LLVMRustDIBuilderCreateFunction<'a>(
+    pub(crate) fn LLVMRustDIBuilderCreateFunction<'a>(
         Builder: &DIBuilder<'a>,
         Scope: &'a DIDescriptor,
         Name: *const c_char,
@@ -2089,7 +2131,7 @@ unsafe extern "C" {
         Decl: Option<&'a DIDescriptor>,
     ) -> &'a DISubprogram;
 
-    pub fn LLVMRustDIBuilderCreateMethod<'a>(
+    pub(crate) fn LLVMRustDIBuilderCreateMethod<'a>(
         Builder: &DIBuilder<'a>,
         Scope: &'a DIDescriptor,
         Name: *const c_char,
@@ -2104,7 +2146,7 @@ unsafe extern "C" {
         TParam: &'a DIArray,
     ) -> &'a DISubprogram;
 
-    pub fn LLVMRustDIBuilderCreateBasicType<'a>(
+    pub(crate) fn LLVMRustDIBuilderCreateBasicType<'a>(
         Builder: &DIBuilder<'a>,
         Name: *const c_char,
         NameLen: size_t,
@@ -2112,7 +2154,7 @@ unsafe extern "C" {
         Encoding: c_uint,
     ) -> &'a DIBasicType;
 
-    pub fn LLVMRustDIBuilderCreateTypedef<'a>(
+    pub(crate) fn LLVMRustDIBuilderCreateTypedef<'a>(
         Builder: &DIBuilder<'a>,
         Type: &'a DIBasicType,
         Name: *const c_char,
@@ -2122,7 +2164,7 @@ unsafe extern "C" {
         Scope: Option<&'a DIScope>,
     ) -> &'a DIDerivedType;
 
-    pub fn LLVMRustDIBuilderCreatePointerType<'a>(
+    pub(crate) fn LLVMRustDIBuilderCreatePointerType<'a>(
         Builder: &DIBuilder<'a>,
         PointeeTy: &'a DIType,
         SizeInBits: u64,
@@ -2132,7 +2174,7 @@ unsafe extern "C" {
         NameLen: size_t,
     ) -> &'a DIDerivedType;
 
-    pub fn LLVMRustDIBuilderCreateStructType<'a>(
+    pub(crate) fn LLVMRustDIBuilderCreateStructType<'a>(
         Builder: &DIBuilder<'a>,
         Scope: Option<&'a DIDescriptor>,
         Name: *const c_char,
@@ -2150,7 +2192,7 @@ unsafe extern "C" {
         UniqueIdLen: size_t,
     ) -> &'a DICompositeType;
 
-    pub fn LLVMRustDIBuilderCreateMemberType<'a>(
+    pub(crate) fn LLVMRustDIBuilderCreateMemberType<'a>(
         Builder: &DIBuilder<'a>,
         Scope: &'a DIDescriptor,
         Name: *const c_char,
@@ -2164,7 +2206,7 @@ unsafe extern "C" {
         Ty: &'a DIType,
     ) -> &'a DIDerivedType;
 
-    pub fn LLVMRustDIBuilderCreateVariantMemberType<'a>(
+    pub(crate) fn LLVMRustDIBuilderCreateVariantMemberType<'a>(
         Builder: &DIBuilder<'a>,
         Scope: &'a DIScope,
         Name: *const c_char,
@@ -2179,7 +2221,7 @@ unsafe extern "C" {
         Ty: &'a DIType,
     ) -> &'a DIType;
 
-    pub fn LLVMRustDIBuilderCreateStaticMemberType<'a>(
+    pub(crate) fn LLVMRustDIBuilderCreateStaticMemberType<'a>(
         Builder: &DIBuilder<'a>,
         Scope: &'a DIDescriptor,
         Name: *const c_char,
@@ -2192,13 +2234,13 @@ unsafe extern "C" {
         AlignInBits: u32,
     ) -> &'a DIDerivedType;
 
-    pub fn LLVMRustDIBuilderCreateQualifiedType<'a>(
+    pub(crate) fn LLVMRustDIBuilderCreateQualifiedType<'a>(
         Builder: &DIBuilder<'a>,
         Tag: c_uint,
         Type: &'a DIType,
     ) -> &'a DIDerivedType;
 
-    pub fn LLVMRustDIBuilderCreateStaticVariable<'a>(
+    pub(crate) fn LLVMRustDIBuilderCreateStaticVariable<'a>(
         Builder: &DIBuilder<'a>,
         Context: Option<&'a DIScope>,
         Name: *const c_char,
@@ -2214,7 +2256,7 @@ unsafe extern "C" {
         AlignInBits: u32,
     ) -> &'a DIGlobalVariableExpression;
 
-    pub fn LLVMRustDIBuilderCreateVariable<'a>(
+    pub(crate) fn LLVMRustDIBuilderCreateVariable<'a>(
         Builder: &DIBuilder<'a>,
         Tag: c_uint,
         Scope: &'a DIDescriptor,
@@ -2229,7 +2271,7 @@ unsafe extern "C" {
         AlignInBits: u32,
     ) -> &'a DIVariable;
 
-    pub fn LLVMRustDIBuilderCreateArrayType<'a>(
+    pub(crate) fn LLVMRustDIBuilderCreateArrayType<'a>(
         Builder: &DIBuilder<'a>,
         Size: u64,
         AlignInBits: u32,
@@ -2237,19 +2279,19 @@ unsafe extern "C" {
         Subscripts: &'a DIArray,
     ) -> &'a DIType;
 
-    pub fn LLVMRustDIBuilderGetOrCreateSubrange<'a>(
+    pub(crate) fn LLVMRustDIBuilderGetOrCreateSubrange<'a>(
         Builder: &DIBuilder<'a>,
         Lo: i64,
         Count: i64,
     ) -> &'a DISubrange;
 
-    pub fn LLVMRustDIBuilderGetOrCreateArray<'a>(
+    pub(crate) fn LLVMRustDIBuilderGetOrCreateArray<'a>(
         Builder: &DIBuilder<'a>,
         Ptr: *const Option<&'a DIDescriptor>,
         Count: c_uint,
     ) -> &'a DIArray;
 
-    pub fn LLVMRustDIBuilderInsertDeclareAtEnd<'a>(
+    pub(crate) fn LLVMRustDIBuilderInsertDeclareAtEnd<'a>(
         Builder: &DIBuilder<'a>,
         Val: &'a Value,
         VarInfo: &'a DIVariable,
@@ -2259,7 +2301,7 @@ unsafe extern "C" {
         InsertAtEnd: &'a BasicBlock,
     );
 
-    pub fn LLVMRustDIBuilderCreateEnumerator<'a>(
+    pub(crate) fn LLVMRustDIBuilderCreateEnumerator<'a>(
         Builder: &DIBuilder<'a>,
         Name: *const c_char,
         NameLen: size_t,
@@ -2268,7 +2310,7 @@ unsafe extern "C" {
         IsUnsigned: bool,
     ) -> &'a DIEnumerator;
 
-    pub fn LLVMRustDIBuilderCreateEnumerationType<'a>(
+    pub(crate) fn LLVMRustDIBuilderCreateEnumerationType<'a>(
         Builder: &DIBuilder<'a>,
         Scope: &'a DIScope,
         Name: *const c_char,
@@ -2282,7 +2324,7 @@ unsafe extern "C" {
         IsScoped: bool,
     ) -> &'a DIType;
 
-    pub fn LLVMRustDIBuilderCreateUnionType<'a>(
+    pub(crate) fn LLVMRustDIBuilderCreateUnionType<'a>(
         Builder: &DIBuilder<'a>,
         Scope: Option<&'a DIScope>,
         Name: *const c_char,
@@ -2298,7 +2340,7 @@ unsafe extern "C" {
         UniqueIdLen: size_t,
     ) -> &'a DIType;
 
-    pub fn LLVMRustDIBuilderCreateVariantPart<'a>(
+    pub(crate) fn LLVMRustDIBuilderCreateVariantPart<'a>(
         Builder: &DIBuilder<'a>,
         Scope: &'a DIScope,
         Name: *const c_char,
@@ -2314,7 +2356,7 @@ unsafe extern "C" {
         UniqueIdLen: size_t,
     ) -> &'a DIDerivedType;
 
-    pub fn LLVMRustDIBuilderCreateTemplateTypeParameter<'a>(
+    pub(crate) fn LLVMRustDIBuilderCreateTemplateTypeParameter<'a>(
         Builder: &DIBuilder<'a>,
         Scope: Option<&'a DIScope>,
         Name: *const c_char,
@@ -2322,37 +2364,37 @@ unsafe extern "C" {
         Ty: &'a DIType,
     ) -> &'a DITemplateTypeParameter;
 
-    pub fn LLVMRustDICompositeTypeReplaceArrays<'a>(
+    pub(crate) fn LLVMRustDICompositeTypeReplaceArrays<'a>(
         Builder: &DIBuilder<'a>,
         CompositeType: &'a DIType,
         Elements: Option<&'a DIArray>,
         Params: Option<&'a DIArray>,
     );
 
-    pub fn LLVMRustDILocationCloneWithBaseDiscriminator<'a>(
+    pub(crate) fn LLVMRustDILocationCloneWithBaseDiscriminator<'a>(
         Location: &'a DILocation,
         BD: c_uint,
     ) -> Option<&'a DILocation>;
 
-    pub fn LLVMRustWriteTypeToString(Type: &Type, s: &RustString);
-    pub fn LLVMRustWriteValueToString(value_ref: &Value, s: &RustString);
+    pub(crate) fn LLVMRustWriteTypeToString(Type: &Type, s: &RustString);
+    pub(crate) fn LLVMRustWriteValueToString(value_ref: &Value, s: &RustString);
 
-    pub fn LLVMRustHasFeature(T: &TargetMachine, s: *const c_char) -> bool;
+    pub(crate) fn LLVMRustHasFeature(T: &TargetMachine, s: *const c_char) -> bool;
 
     pub(crate) fn LLVMRustPrintTargetCPUs(TM: &TargetMachine, OutStr: &RustString);
-    pub fn LLVMRustGetTargetFeaturesCount(T: &TargetMachine) -> size_t;
-    pub fn LLVMRustGetTargetFeature(
+    pub(crate) fn LLVMRustGetTargetFeaturesCount(T: &TargetMachine) -> size_t;
+    pub(crate) fn LLVMRustGetTargetFeature(
         T: &TargetMachine,
         Index: size_t,
         Feature: &mut *const c_char,
         Desc: &mut *const c_char,
     );
 
-    pub fn LLVMRustGetHostCPUName(LenOut: &mut size_t) -> *const u8;
+    pub(crate) fn LLVMRustGetHostCPUName(LenOut: &mut size_t) -> *const u8;
 
     // This function makes copies of pointed to data, so the data's lifetime may end after this
     // function returns.
-    pub fn LLVMRustCreateTargetMachine(
+    pub(crate) fn LLVMRustCreateTargetMachine(
         Triple: *const c_char,
         CPU: *const c_char,
         Features: *const c_char,
@@ -2378,22 +2420,22 @@ unsafe extern "C" {
         ArgsCstrBuffLen: usize,
     ) -> *mut TargetMachine;
 
-    pub fn LLVMRustDisposeTargetMachine(T: *mut TargetMachine);
-    pub fn LLVMRustAddLibraryInfo<'a>(
+    pub(crate) fn LLVMRustDisposeTargetMachine(T: *mut TargetMachine);
+    pub(crate) fn LLVMRustAddLibraryInfo<'a>(
         PM: &PassManager<'a>,
         M: &'a Module,
         DisableSimplifyLibCalls: bool,
     );
-    pub fn LLVMRustWriteOutputFile<'a>(
+    pub(crate) fn LLVMRustWriteOutputFile<'a>(
         T: &'a TargetMachine,
-        PM: &PassManager<'a>,
+        PM: *mut PassManager<'a>,
         M: &'a Module,
         Output: *const c_char,
         DwoOutput: *const c_char,
         FileType: FileType,
         VerifyIR: bool,
     ) -> LLVMRustResult;
-    pub fn LLVMRustOptimize<'a>(
+    pub(crate) fn LLVMRustOptimize<'a>(
         M: &'a Module,
         TM: &'a TargetMachine,
         OptLevel: PassBuilderOptLevel,
@@ -2402,13 +2444,16 @@ unsafe extern "C" {
         NoPrepopulatePasses: bool,
         VerifyIR: bool,
         LintIR: bool,
-        UseThinLTOBuffers: bool,
+        ThinLTOBuffer: Option<&mut *mut ThinLTOBuffer>,
+        EmitThinLTO: bool,
+        EmitThinLTOSummary: bool,
         MergeFunctions: bool,
         UnrollLoops: bool,
         SLPVectorize: bool,
         LoopVectorize: bool,
         DisableSimplifyLibCalls: bool,
         EmitLifetimeMarkers: bool,
+        RunEnzyme: bool,
         SanitizerOptions: Option<&SanitizerOptions>,
         PGOGenPath: *const c_char,
         PGOUsePath: *const c_char,
@@ -2424,29 +2469,32 @@ unsafe extern "C" {
         LLVMPlugins: *const c_char,
         LLVMPluginsLen: size_t,
     ) -> LLVMRustResult;
-    pub fn LLVMRustPrintModule(
+    pub(crate) fn LLVMRustPrintModule(
         M: &Module,
         Output: *const c_char,
         Demangle: extern "C" fn(*const c_char, size_t, *mut c_char, size_t) -> size_t,
     ) -> LLVMRustResult;
-    pub fn LLVMRustSetLLVMOptions(Argc: c_int, Argv: *const *const c_char);
-    pub fn LLVMRustPrintPasses();
-    pub fn LLVMRustSetNormalizedTarget(M: &Module, triple: *const c_char);
-    pub fn LLVMRustRunRestrictionPass(M: &Module, syms: *const *const c_char, len: size_t);
-
-    pub fn LLVMRustOpenArchive(path: *const c_char) -> Option<&'static mut Archive>;
-    pub fn LLVMRustArchiveIteratorNew(AR: &Archive) -> &mut ArchiveIterator<'_>;
-    pub fn LLVMRustArchiveIteratorNext<'a>(
+    pub(crate) fn LLVMRustSetLLVMOptions(Argc: c_int, Argv: *const *const c_char);
+    pub(crate) fn LLVMRustPrintPasses();
+    pub(crate) fn LLVMRustSetNormalizedTarget(M: &Module, triple: *const c_char);
+    pub(crate) fn LLVMRustRunRestrictionPass(M: &Module, syms: *const *const c_char, len: size_t);
+
+    pub(crate) fn LLVMRustOpenArchive(path: *const c_char) -> Option<&'static mut Archive>;
+    pub(crate) fn LLVMRustArchiveIteratorNew(AR: &Archive) -> &mut ArchiveIterator<'_>;
+    pub(crate) fn LLVMRustArchiveIteratorNext<'a>(
         AIR: &ArchiveIterator<'a>,
     ) -> Option<&'a mut ArchiveChild<'a>>;
-    pub fn LLVMRustArchiveChildName(ACR: &ArchiveChild<'_>, size: &mut size_t) -> *const c_char;
-    pub fn LLVMRustArchiveChildFree<'a>(ACR: &'a mut ArchiveChild<'a>);
-    pub fn LLVMRustArchiveIteratorFree<'a>(AIR: &'a mut ArchiveIterator<'a>);
-    pub fn LLVMRustDestroyArchive(AR: &'static mut Archive);
+    pub(crate) fn LLVMRustArchiveChildName(
+        ACR: &ArchiveChild<'_>,
+        size: &mut size_t,
+    ) -> *const c_char;
+    pub(crate) fn LLVMRustArchiveChildFree<'a>(ACR: &'a mut ArchiveChild<'a>);
+    pub(crate) fn LLVMRustArchiveIteratorFree<'a>(AIR: &'a mut ArchiveIterator<'a>);
+    pub(crate) fn LLVMRustDestroyArchive(AR: &'static mut Archive);
 
-    pub fn LLVMRustWriteTwineToString(T: &Twine, s: &RustString);
+    pub(crate) fn LLVMRustWriteTwineToString(T: &Twine, s: &RustString);
 
-    pub fn LLVMRustUnpackOptimizationDiagnostic<'a>(
+    pub(crate) fn LLVMRustUnpackOptimizationDiagnostic<'a>(
         DI: &'a DiagnosticInfo,
         pass_name_out: &RustString,
         function_out: &mut Option<&'a Value>,
@@ -2456,22 +2504,22 @@ unsafe extern "C" {
         message_out: &RustString,
     );
 
-    pub fn LLVMRustUnpackInlineAsmDiagnostic<'a>(
+    pub(crate) fn LLVMRustUnpackInlineAsmDiagnostic<'a>(
         DI: &'a DiagnosticInfo,
         level_out: &mut DiagnosticLevel,
         cookie_out: &mut u64,
         message_out: &mut Option<&'a Twine>,
     );
 
-    pub fn LLVMRustWriteDiagnosticInfoToString(DI: &DiagnosticInfo, s: &RustString);
-    pub fn LLVMRustGetDiagInfoKind(DI: &DiagnosticInfo) -> DiagnosticKind;
+    pub(crate) fn LLVMRustWriteDiagnosticInfoToString(DI: &DiagnosticInfo, s: &RustString);
+    pub(crate) fn LLVMRustGetDiagInfoKind(DI: &DiagnosticInfo) -> DiagnosticKind;
 
-    pub fn LLVMRustGetSMDiagnostic<'a>(
+    pub(crate) fn LLVMRustGetSMDiagnostic<'a>(
         DI: &'a DiagnosticInfo,
         cookie_out: &mut u64,
     ) -> &'a SMDiagnostic;
 
-    pub fn LLVMRustUnpackSMDiagnostic(
+    pub(crate) fn LLVMRustUnpackSMDiagnostic(
         d: &SMDiagnostic,
         message_out: &RustString,
         buffer_out: &RustString,
@@ -2481,7 +2529,7 @@ unsafe extern "C" {
         num_ranges: &mut usize,
     ) -> bool;
 
-    pub fn LLVMRustWriteArchive(
+    pub(crate) fn LLVMRustWriteArchive(
         Dst: *const c_char,
         NumMembers: size_t,
         Members: *const &RustArchiveMember<'_>,
@@ -2489,63 +2537,63 @@ unsafe extern "C" {
         Kind: ArchiveKind,
         isEC: bool,
     ) -> LLVMRustResult;
-    pub fn LLVMRustArchiveMemberNew<'a>(
+    pub(crate) fn LLVMRustArchiveMemberNew<'a>(
         Filename: *const c_char,
         Name: *const c_char,
         Child: Option<&ArchiveChild<'a>>,
     ) -> &'a mut RustArchiveMember<'a>;
-    pub fn LLVMRustArchiveMemberFree<'a>(Member: &'a mut RustArchiveMember<'a>);
+    pub(crate) fn LLVMRustArchiveMemberFree<'a>(Member: &'a mut RustArchiveMember<'a>);
 
-    pub fn LLVMRustSetDataLayoutFromTargetMachine<'a>(M: &'a Module, TM: &'a TargetMachine);
+    pub(crate) fn LLVMRustSetDataLayoutFromTargetMachine<'a>(M: &'a Module, TM: &'a TargetMachine);
 
-    pub fn LLVMRustPositionBuilderAtStart<'a>(B: &Builder<'a>, BB: &'a BasicBlock);
+    pub(crate) fn LLVMRustPositionBuilderAtStart<'a>(B: &Builder<'a>, BB: &'a BasicBlock);
 
-    pub fn LLVMRustSetModulePICLevel(M: &Module);
-    pub fn LLVMRustSetModulePIELevel(M: &Module);
-    pub fn LLVMRustSetModuleCodeModel(M: &Module, Model: CodeModel);
-    pub fn LLVMRustModuleBufferCreate(M: &Module) -> &'static mut ModuleBuffer;
-    pub fn LLVMRustModuleBufferPtr(p: &ModuleBuffer) -> *const u8;
-    pub fn LLVMRustModuleBufferLen(p: &ModuleBuffer) -> usize;
-    pub fn LLVMRustModuleBufferFree(p: &'static mut ModuleBuffer);
-    pub fn LLVMRustModuleCost(M: &Module) -> u64;
-    pub fn LLVMRustModuleInstructionStats(M: &Module, Str: &RustString);
+    pub(crate) fn LLVMRustSetModulePICLevel(M: &Module);
+    pub(crate) fn LLVMRustSetModulePIELevel(M: &Module);
+    pub(crate) fn LLVMRustSetModuleCodeModel(M: &Module, Model: CodeModel);
+    pub(crate) fn LLVMRustModuleBufferCreate(M: &Module) -> &'static mut ModuleBuffer;
+    pub(crate) fn LLVMRustModuleBufferPtr(p: &ModuleBuffer) -> *const u8;
+    pub(crate) fn LLVMRustModuleBufferLen(p: &ModuleBuffer) -> usize;
+    pub(crate) fn LLVMRustModuleBufferFree(p: &'static mut ModuleBuffer);
+    pub(crate) fn LLVMRustModuleCost(M: &Module) -> u64;
+    pub(crate) fn LLVMRustModuleInstructionStats(M: &Module, Str: &RustString);
 
-    pub fn LLVMRustThinLTOBufferCreate(
+    pub(crate) fn LLVMRustThinLTOBufferCreate(
         M: &Module,
         is_thin: bool,
         emit_summary: bool,
     ) -> &'static mut ThinLTOBuffer;
-    pub fn LLVMRustThinLTOBufferFree(M: &'static mut ThinLTOBuffer);
-    pub fn LLVMRustThinLTOBufferPtr(M: &ThinLTOBuffer) -> *const c_char;
-    pub fn LLVMRustThinLTOBufferLen(M: &ThinLTOBuffer) -> size_t;
-    pub fn LLVMRustThinLTOBufferThinLinkDataPtr(M: &ThinLTOBuffer) -> *const c_char;
-    pub fn LLVMRustThinLTOBufferThinLinkDataLen(M: &ThinLTOBuffer) -> size_t;
-    pub fn LLVMRustCreateThinLTOData(
+    pub(crate) fn LLVMRustThinLTOBufferFree(M: &'static mut ThinLTOBuffer);
+    pub(crate) fn LLVMRustThinLTOBufferPtr(M: &ThinLTOBuffer) -> *const c_char;
+    pub(crate) fn LLVMRustThinLTOBufferLen(M: &ThinLTOBuffer) -> size_t;
+    pub(crate) fn LLVMRustThinLTOBufferThinLinkDataPtr(M: &ThinLTOBuffer) -> *const c_char;
+    pub(crate) fn LLVMRustThinLTOBufferThinLinkDataLen(M: &ThinLTOBuffer) -> size_t;
+    pub(crate) fn LLVMRustCreateThinLTOData(
         Modules: *const ThinLTOModule,
         NumModules: size_t,
         PreservedSymbols: *const *const c_char,
         PreservedSymbolsLen: size_t,
     ) -> Option<&'static mut ThinLTOData>;
-    pub fn LLVMRustPrepareThinLTORename(
+    pub(crate) fn LLVMRustPrepareThinLTORename(
         Data: &ThinLTOData,
         Module: &Module,
         Target: &TargetMachine,
     );
-    pub fn LLVMRustPrepareThinLTOResolveWeak(Data: &ThinLTOData, Module: &Module) -> bool;
-    pub fn LLVMRustPrepareThinLTOInternalize(Data: &ThinLTOData, Module: &Module) -> bool;
-    pub fn LLVMRustPrepareThinLTOImport(
+    pub(crate) fn LLVMRustPrepareThinLTOResolveWeak(Data: &ThinLTOData, Module: &Module) -> bool;
+    pub(crate) fn LLVMRustPrepareThinLTOInternalize(Data: &ThinLTOData, Module: &Module) -> bool;
+    pub(crate) fn LLVMRustPrepareThinLTOImport(
         Data: &ThinLTOData,
         Module: &Module,
         Target: &TargetMachine,
     ) -> bool;
-    pub fn LLVMRustFreeThinLTOData(Data: &'static mut ThinLTOData);
-    pub fn LLVMRustParseBitcodeForLTO(
+    pub(crate) fn LLVMRustFreeThinLTOData(Data: &'static mut ThinLTOData);
+    pub(crate) fn LLVMRustParseBitcodeForLTO(
         Context: &Context,
         Data: *const u8,
         len: usize,
         Identifier: *const c_char,
     ) -> Option<&Module>;
-    pub fn LLVMRustGetSliceFromObjectDataByName(
+    pub(crate) fn LLVMRustGetSliceFromObjectDataByName(
         data: *const u8,
         len: usize,
         name: *const u8,
@@ -2553,25 +2601,27 @@ unsafe extern "C" {
         out_len: &mut usize,
     ) -> *const u8;
 
-    pub fn LLVMRustLinkerNew(M: &Module) -> &mut Linker<'_>;
-    pub fn LLVMRustLinkerAdd(
+    pub(crate) fn LLVMRustLinkerNew(M: &Module) -> &mut Linker<'_>;
+    pub(crate) fn LLVMRustLinkerAdd(
         linker: &Linker<'_>,
         bytecode: *const c_char,
         bytecode_len: usize,
     ) -> bool;
-    pub fn LLVMRustLinkerFree<'a>(linker: &'a mut Linker<'a>);
-    pub fn LLVMRustComputeLTOCacheKey(
+    pub(crate) fn LLVMRustLinkerFree<'a>(linker: &'a mut Linker<'a>);
+    pub(crate) fn LLVMRustComputeLTOCacheKey(
         key_out: &RustString,
         mod_id: *const c_char,
         data: &ThinLTOData,
     );
 
-    pub fn LLVMRustContextGetDiagnosticHandler(Context: &Context) -> Option<&DiagnosticHandler>;
-    pub fn LLVMRustContextSetDiagnosticHandler(
+    pub(crate) fn LLVMRustContextGetDiagnosticHandler(
+        Context: &Context,
+    ) -> Option<&DiagnosticHandler>;
+    pub(crate) fn LLVMRustContextSetDiagnosticHandler(
         context: &Context,
         diagnostic_handler: Option<&DiagnosticHandler>,
     );
-    pub fn LLVMRustContextConfigureDiagnosticHandler(
+    pub(crate) fn LLVMRustContextConfigureDiagnosticHandler(
         context: &Context,
         diagnostic_handler_callback: DiagnosticHandlerTy,
         diagnostic_handler_context: *mut c_void,
@@ -2582,17 +2632,15 @@ unsafe extern "C" {
         pgo_available: bool,
     );
 
-    pub fn LLVMRustGetMangledName(V: &Value, out: &RustString);
-
-    pub fn LLVMRustGetElementTypeArgIndex(CallSite: &Value) -> i32;
+    pub(crate) fn LLVMRustGetMangledName(V: &Value, out: &RustString);
 
-    pub fn LLVMRustIsBitcode(ptr: *const u8, len: usize) -> bool;
+    pub(crate) fn LLVMRustGetElementTypeArgIndex(CallSite: &Value) -> i32;
 
-    pub fn LLVMRustLLVMHasZlibCompressionForDebugSymbols() -> bool;
+    pub(crate) fn LLVMRustLLVMHasZlibCompressionForDebugSymbols() -> bool;
 
-    pub fn LLVMRustLLVMHasZstdCompressionForDebugSymbols() -> bool;
+    pub(crate) fn LLVMRustLLVMHasZstdCompressionForDebugSymbols() -> bool;
 
-    pub fn LLVMRustGetSymbols(
+    pub(crate) fn LLVMRustGetSymbols(
         buf_ptr: *const u8,
         buf_len: usize,
         state: *mut c_void,
@@ -2600,10 +2648,10 @@ unsafe extern "C" {
         error_callback: GetSymbolsErrorCallback,
     ) -> *mut c_void;
 
-    pub fn LLVMRustIs64BitSymbolicFile(buf_ptr: *const u8, buf_len: usize) -> bool;
+    pub(crate) fn LLVMRustIs64BitSymbolicFile(buf_ptr: *const u8, buf_len: usize) -> bool;
 
-    pub fn LLVMRustIsECObject(buf_ptr: *const u8, buf_len: usize) -> bool;
+    pub(crate) fn LLVMRustIsECObject(buf_ptr: *const u8, buf_len: usize) -> bool;
 
-    pub fn LLVMRustSetNoSanitizeAddress(Global: &Value);
-    pub fn LLVMRustSetNoSanitizeHWAddress(Global: &Value);
+    pub(crate) fn LLVMRustSetNoSanitizeAddress(Global: &Value);
+    pub(crate) fn LLVMRustSetNoSanitizeHWAddress(Global: &Value);
 }
diff --git a/compiler/rustc_codegen_llvm/src/llvm/mod.rs b/compiler/rustc_codegen_llvm/src/llvm/mod.rs
index 2592a7df95c..6ca81c651ed 100644
--- a/compiler/rustc_codegen_llvm/src/llvm/mod.rs
+++ b/compiler/rustc_codegen_llvm/src/llvm/mod.rs
@@ -1,7 +1,6 @@
 #![allow(non_snake_case)]
 
 use std::ffi::{CStr, CString};
-use std::ops::Deref;
 use std::ptr;
 use std::str::FromStr;
 use std::string::FromUtf8Error;
@@ -10,25 +9,21 @@ use libc::c_uint;
 use rustc_abi::{Align, Size, WrappingRange};
 use rustc_llvm::RustString;
 
-pub use self::AtomicRmwBinOp::*;
-pub use self::CallConv::*;
-pub use self::CodeGenOptSize::*;
-pub use self::IntPredicate::*;
-pub use self::Linkage::*;
-pub use self::MetadataType::*;
-pub use self::RealPredicate::*;
-pub use self::ffi::*;
+pub(crate) use self::CallConv::*;
+pub(crate) use self::CodeGenOptSize::*;
+pub(crate) use self::MetadataType::*;
+pub(crate) use self::ffi::*;
 use crate::common::AsCCharPtr;
 
-pub mod archive_ro;
-pub mod diagnostic;
-pub mod enzyme_ffi;
+pub(crate) mod archive_ro;
+pub(crate) mod diagnostic;
+pub(crate) mod enzyme_ffi;
 mod ffi;
 
-pub use self::enzyme_ffi::*;
+pub(crate) use self::enzyme_ffi::*;
 
 impl LLVMRustResult {
-    pub fn into_result(self) -> Result<(), ()> {
+    pub(crate) fn into_result(self) -> Result<(), ()> {
         match self {
             LLVMRustResult::Success => Ok(()),
             LLVMRustResult::Failure => Err(()),
@@ -36,13 +31,17 @@ impl LLVMRustResult {
     }
 }
 
-pub fn AddFunctionAttributes<'ll>(llfn: &'ll Value, idx: AttributePlace, attrs: &[&'ll Attribute]) {
+pub(crate) fn AddFunctionAttributes<'ll>(
+    llfn: &'ll Value,
+    idx: AttributePlace,
+    attrs: &[&'ll Attribute],
+) {
     unsafe {
         LLVMRustAddFunctionAttributes(llfn, idx.as_uint(), attrs.as_ptr(), attrs.len());
     }
 }
 
-pub fn AddCallSiteAttributes<'ll>(
+pub(crate) fn AddCallSiteAttributes<'ll>(
     callsite: &'ll Value,
     idx: AttributePlace,
     attrs: &[&'ll Attribute],
@@ -52,7 +51,11 @@ pub fn AddCallSiteAttributes<'ll>(
     }
 }
 
-pub fn CreateAttrStringValue<'ll>(llcx: &'ll Context, attr: &str, value: &str) -> &'ll Attribute {
+pub(crate) fn CreateAttrStringValue<'ll>(
+    llcx: &'ll Context,
+    attr: &str,
+    value: &str,
+) -> &'ll Attribute {
     unsafe {
         LLVMCreateStringAttribute(
             llcx,
@@ -64,7 +67,7 @@ pub fn CreateAttrStringValue<'ll>(llcx: &'ll Context, attr: &str, value: &str) -
     }
 }
 
-pub fn CreateAttrString<'ll>(llcx: &'ll Context, attr: &str) -> &'ll Attribute {
+pub(crate) fn CreateAttrString<'ll>(llcx: &'ll Context, attr: &str) -> &'ll Attribute {
     unsafe {
         LLVMCreateStringAttribute(
             llcx,
@@ -76,39 +79,39 @@ pub fn CreateAttrString<'ll>(llcx: &'ll Context, attr: &str) -> &'ll Attribute {
     }
 }
 
-pub fn CreateAlignmentAttr(llcx: &Context, bytes: u64) -> &Attribute {
+pub(crate) fn CreateAlignmentAttr(llcx: &Context, bytes: u64) -> &Attribute {
     unsafe { LLVMRustCreateAlignmentAttr(llcx, bytes) }
 }
 
-pub fn CreateDereferenceableAttr(llcx: &Context, bytes: u64) -> &Attribute {
+pub(crate) fn CreateDereferenceableAttr(llcx: &Context, bytes: u64) -> &Attribute {
     unsafe { LLVMRustCreateDereferenceableAttr(llcx, bytes) }
 }
 
-pub fn CreateDereferenceableOrNullAttr(llcx: &Context, bytes: u64) -> &Attribute {
+pub(crate) fn CreateDereferenceableOrNullAttr(llcx: &Context, bytes: u64) -> &Attribute {
     unsafe { LLVMRustCreateDereferenceableOrNullAttr(llcx, bytes) }
 }
 
-pub fn CreateByValAttr<'ll>(llcx: &'ll Context, ty: &'ll Type) -> &'ll Attribute {
+pub(crate) fn CreateByValAttr<'ll>(llcx: &'ll Context, ty: &'ll Type) -> &'ll Attribute {
     unsafe { LLVMRustCreateByValAttr(llcx, ty) }
 }
 
-pub fn CreateStructRetAttr<'ll>(llcx: &'ll Context, ty: &'ll Type) -> &'ll Attribute {
+pub(crate) fn CreateStructRetAttr<'ll>(llcx: &'ll Context, ty: &'ll Type) -> &'ll Attribute {
     unsafe { LLVMRustCreateStructRetAttr(llcx, ty) }
 }
 
-pub fn CreateUWTableAttr(llcx: &Context, async_: bool) -> &Attribute {
+pub(crate) fn CreateUWTableAttr(llcx: &Context, async_: bool) -> &Attribute {
     unsafe { LLVMRustCreateUWTableAttr(llcx, async_) }
 }
 
-pub fn CreateAllocSizeAttr(llcx: &Context, size_arg: u32) -> &Attribute {
+pub(crate) fn CreateAllocSizeAttr(llcx: &Context, size_arg: u32) -> &Attribute {
     unsafe { LLVMRustCreateAllocSizeAttr(llcx, size_arg) }
 }
 
-pub fn CreateAllocKindAttr(llcx: &Context, kind_arg: AllocKindFlags) -> &Attribute {
+pub(crate) fn CreateAllocKindAttr(llcx: &Context, kind_arg: AllocKindFlags) -> &Attribute {
     unsafe { LLVMRustCreateAllocKindAttr(llcx, kind_arg.bits()) }
 }
 
-pub fn CreateRangeAttr(llcx: &Context, size: Size, range: WrappingRange) -> &Attribute {
+pub(crate) fn CreateRangeAttr(llcx: &Context, size: Size, range: WrappingRange) -> &Attribute {
     let lower = range.start;
     let upper = range.end.wrapping_add(1);
     let lower_words = [lower as u64, (lower >> 64) as u64];
@@ -124,14 +127,14 @@ pub fn CreateRangeAttr(llcx: &Context, size: Size, range: WrappingRange) -> &Att
 }
 
 #[derive(Copy, Clone)]
-pub enum AttributePlace {
+pub(crate) enum AttributePlace {
     ReturnValue,
     Argument(u32),
     Function,
 }
 
 impl AttributePlace {
-    pub fn as_uint(self) -> c_uint {
+    pub(crate) fn as_uint(self) -> c_uint {
         match self {
             AttributePlace::ReturnValue => 0,
             AttributePlace::Argument(i) => 1 + i,
@@ -142,7 +145,7 @@ impl AttributePlace {
 
 #[derive(Copy, Clone, PartialEq)]
 #[repr(C)]
-pub enum CodeGenOptSize {
+pub(crate) enum CodeGenOptSize {
     CodeGenOptSizeNone = 0,
     CodeGenOptSizeDefault = 1,
     CodeGenOptSizeAggressive = 2,
@@ -163,12 +166,12 @@ impl FromStr for ArchiveKind {
     }
 }
 
-pub fn SetInstructionCallConv(instr: &Value, cc: CallConv) {
+pub(crate) fn SetInstructionCallConv(instr: &Value, cc: CallConv) {
     unsafe {
         LLVMSetInstructionCallConv(instr, cc as c_uint);
     }
 }
-pub fn SetFunctionCallConv(fn_: &Value, cc: CallConv) {
+pub(crate) fn SetFunctionCallConv(fn_: &Value, cc: CallConv) {
     unsafe {
         LLVMSetFunctionCallConv(fn_, cc as c_uint);
     }
@@ -180,20 +183,20 @@ pub fn SetFunctionCallConv(fn_: &Value, cc: CallConv) {
 // value's name as the comdat value to make sure that it is in a 1-to-1 relationship to the
 // function.
 // For more details on COMDAT sections see e.g., https://www.airs.com/blog/archives/52
-pub fn SetUniqueComdat(llmod: &Module, val: &Value) {
+pub(crate) fn SetUniqueComdat(llmod: &Module, val: &Value) {
     let name_buf = get_value_name(val).to_vec();
     let name =
         CString::from_vec_with_nul(name_buf).or_else(|buf| CString::new(buf.into_bytes())).unwrap();
     set_comdat(llmod, val, &name);
 }
 
-pub fn SetUnnamedAddress(global: &Value, unnamed: UnnamedAddr) {
+pub(crate) fn SetUnnamedAddress(global: &Value, unnamed: UnnamedAddr) {
     unsafe {
         LLVMSetUnnamedAddress(global, unnamed);
     }
 }
 
-pub fn set_thread_local_mode(global: &Value, mode: ThreadLocalMode) {
+pub(crate) fn set_thread_local_mode(global: &Value, mode: ThreadLocalMode) {
     unsafe {
         LLVMSetThreadLocalMode(global, mode);
     }
@@ -201,61 +204,65 @@ pub fn set_thread_local_mode(global: &Value, mode: ThreadLocalMode) {
 
 impl AttributeKind {
     /// Create an LLVM Attribute with no associated value.
-    pub fn create_attr(self, llcx: &Context) -> &Attribute {
+    pub(crate) fn create_attr(self, llcx: &Context) -> &Attribute {
         unsafe { LLVMRustCreateAttrNoValue(llcx, self) }
     }
 }
 
 impl MemoryEffects {
     /// Create an LLVM Attribute with these memory effects.
-    pub fn create_attr(self, llcx: &Context) -> &Attribute {
+    pub(crate) fn create_attr(self, llcx: &Context) -> &Attribute {
         unsafe { LLVMRustCreateMemoryEffectsAttr(llcx, self) }
     }
 }
 
-pub fn set_section(llglobal: &Value, section_name: &CStr) {
+pub(crate) fn set_section(llglobal: &Value, section_name: &CStr) {
     unsafe {
         LLVMSetSection(llglobal, section_name.as_ptr());
     }
 }
 
-pub fn add_global<'a>(llmod: &'a Module, ty: &'a Type, name_cstr: &CStr) -> &'a Value {
+pub(crate) fn add_global<'a>(llmod: &'a Module, ty: &'a Type, name_cstr: &CStr) -> &'a Value {
     unsafe { LLVMAddGlobal(llmod, ty, name_cstr.as_ptr()) }
 }
 
-pub fn set_initializer(llglobal: &Value, constant_val: &Value) {
+pub(crate) fn set_initializer(llglobal: &Value, constant_val: &Value) {
     unsafe {
         LLVMSetInitializer(llglobal, constant_val);
     }
 }
 
-pub fn set_global_constant(llglobal: &Value, is_constant: bool) {
+pub(crate) fn set_global_constant(llglobal: &Value, is_constant: bool) {
     unsafe {
         LLVMSetGlobalConstant(llglobal, if is_constant { ffi::True } else { ffi::False });
     }
 }
 
-pub fn get_linkage(llglobal: &Value) -> Linkage {
+pub(crate) fn get_linkage(llglobal: &Value) -> Linkage {
     unsafe { LLVMGetLinkage(llglobal) }.to_rust()
 }
 
-pub fn set_linkage(llglobal: &Value, linkage: Linkage) {
+pub(crate) fn set_linkage(llglobal: &Value, linkage: Linkage) {
     unsafe {
         LLVMSetLinkage(llglobal, linkage);
     }
 }
 
-pub fn get_visibility(llglobal: &Value) -> Visibility {
+pub(crate) fn is_declaration(llglobal: &Value) -> bool {
+    unsafe { LLVMIsDeclaration(llglobal) == ffi::True }
+}
+
+pub(crate) fn get_visibility(llglobal: &Value) -> Visibility {
     unsafe { LLVMGetVisibility(llglobal) }.to_rust()
 }
 
-pub fn set_visibility(llglobal: &Value, visibility: Visibility) {
+pub(crate) fn set_visibility(llglobal: &Value, visibility: Visibility) {
     unsafe {
         LLVMSetVisibility(llglobal, visibility);
     }
 }
 
-pub fn set_alignment(llglobal: &Value, align: Align) {
+pub(crate) fn set_alignment(llglobal: &Value, align: Align) {
     unsafe {
         ffi::LLVMSetAlignment(llglobal, align.bytes() as c_uint);
     }
@@ -265,7 +272,7 @@ pub fn set_alignment(llglobal: &Value, align: Align) {
 ///
 /// Inserts the comdat into `llmod` if it does not exist.
 /// It is an error to call this if the target does not support comdat.
-pub fn set_comdat(llmod: &Module, llglobal: &Value, name: &CStr) {
+pub(crate) fn set_comdat(llmod: &Module, llglobal: &Value, name: &CStr) {
     unsafe {
         let comdat = LLVMGetOrInsertComdat(llmod, name.as_ptr());
         LLVMSetComdat(llglobal, comdat);
@@ -273,7 +280,7 @@ pub fn set_comdat(llmod: &Module, llglobal: &Value, name: &CStr) {
 }
 
 /// Safe wrapper around `LLVMGetParam`, because segfaults are no fun.
-pub fn get_param(llfn: &Value, index: c_uint) -> &Value {
+pub(crate) fn get_param(llfn: &Value, index: c_uint) -> &Value {
     unsafe {
         assert!(
             index < LLVMCountParams(llfn),
@@ -286,7 +293,7 @@ pub fn get_param(llfn: &Value, index: c_uint) -> &Value {
 }
 
 /// Safe wrapper for `LLVMGetValueName2` into a byte slice
-pub fn get_value_name(value: &Value) -> &[u8] {
+pub(crate) fn get_value_name(value: &Value) -> &[u8] {
     unsafe {
         let mut len = 0;
         let data = LLVMGetValueName2(value, &mut len);
@@ -295,28 +302,28 @@ pub fn get_value_name(value: &Value) -> &[u8] {
 }
 
 /// Safe wrapper for `LLVMSetValueName2` from a byte slice
-pub fn set_value_name(value: &Value, name: &[u8]) {
+pub(crate) fn set_value_name(value: &Value, name: &[u8]) {
     unsafe {
         let data = name.as_c_char_ptr();
         LLVMSetValueName2(value, data, name.len());
     }
 }
 
-pub fn build_string(f: impl FnOnce(&RustString)) -> Result<String, FromUtf8Error> {
+pub(crate) fn build_string(f: impl FnOnce(&RustString)) -> Result<String, FromUtf8Error> {
     String::from_utf8(RustString::build_byte_buffer(f))
 }
 
-pub fn build_byte_buffer(f: impl FnOnce(&RustString)) -> Vec<u8> {
+pub(crate) fn build_byte_buffer(f: impl FnOnce(&RustString)) -> Vec<u8> {
     RustString::build_byte_buffer(f)
 }
 
-pub fn twine_to_string(tr: &Twine) -> String {
+pub(crate) fn twine_to_string(tr: &Twine) -> String {
     unsafe {
         build_string(|s| LLVMRustWriteTwineToString(tr, s)).expect("got a non-UTF8 Twine from LLVM")
     }
 }
 
-pub fn last_error() -> Option<String> {
+pub(crate) fn last_error() -> Option<String> {
     unsafe {
         let cstr = LLVMRustGetLastError();
         if cstr.is_null() {
@@ -347,6 +354,16 @@ impl<'a> OperandBundleOwned<'a> {
         };
         OperandBundleOwned { raw: ptr::NonNull::new(raw).unwrap() }
     }
+
+    /// Returns inner `OperandBundle` type.
+    ///
+    /// This could be a `Deref` implementation, but `OperandBundle` contains an extern type and
+    /// `Deref::Target: ?Sized`.
+    pub(crate) fn raw(&self) -> &OperandBundle<'a> {
+        // SAFETY: The returned reference is opaque and can only used for FFI.
+        // It is valid for as long as `&self` is.
+        unsafe { self.raw.as_ref() }
+    }
 }
 
 impl Drop for OperandBundleOwned<'_> {
@@ -357,16 +374,6 @@ impl Drop for OperandBundleOwned<'_> {
     }
 }
 
-impl<'a> Deref for OperandBundleOwned<'a> {
-    type Target = OperandBundle<'a>;
-
-    fn deref(&self) -> &Self::Target {
-        // SAFETY: The returned reference is opaque and can only used for FFI.
-        // It is valid for as long as `&self` is.
-        unsafe { self.raw.as_ref() }
-    }
-}
-
 pub(crate) fn add_module_flag_u32(
     module: &Module,
     merge_behavior: ModuleFlagMergeBehavior,
@@ -395,3 +402,15 @@ pub(crate) fn add_module_flag_str(
         );
     }
 }
+
+pub(crate) fn set_dllimport_storage_class<'ll>(v: &'ll Value) {
+    unsafe {
+        LLVMSetDLLStorageClass(v, DLLStorageClass::DllImport);
+    }
+}
+
+pub(crate) fn set_dso_local<'ll>(v: &'ll Value) {
+    unsafe {
+        LLVMRustSetDSOLocal(v, true);
+    }
+}
diff --git a/compiler/rustc_codegen_llvm/src/llvm_util.rs b/compiler/rustc_codegen_llvm/src/llvm_util.rs
index 53611c746a7..4e85286ed55 100644
--- a/compiler/rustc_codegen_llvm/src/llvm_util.rs
+++ b/compiler/rustc_codegen_llvm/src/llvm_util.rs
@@ -271,15 +271,20 @@ pub(crate) fn to_llvm_features<'a>(sess: &Session, s: &'a str) -> Option<LLVMFea
         ("aarch64", "fp16") => Some(LLVMFeature::new("fullfp16")),
         // Filter out features that are not supported by the current LLVM version
         ("aarch64", "fpmr") if get_version().0 != 18 => None,
+        ("arm", "fp16") => Some(LLVMFeature::new("fullfp16")),
         // In LLVM 18, `unaligned-scalar-mem` was merged with `unaligned-vector-mem` into a single
         // feature called `fast-unaligned-access`. In LLVM 19, it was split back out.
-        ("riscv32" | "riscv64", "unaligned-scalar-mem") if get_version().0 == 18 => {
+        ("riscv32" | "riscv64", "unaligned-scalar-mem" | "unaligned-vector-mem")
+            if get_version().0 == 18 =>
+        {
             Some(LLVMFeature::new("fast-unaligned-access"))
         }
         // Filter out features that are not supported by the current LLVM version
         ("riscv32" | "riscv64", "zaamo") if get_version().0 < 19 => None,
         ("riscv32" | "riscv64", "zabha") if get_version().0 < 19 => None,
         ("riscv32" | "riscv64", "zalrsc") if get_version().0 < 19 => None,
+        ("riscv32" | "riscv64", "zama16b") if get_version().0 < 19 => None,
+        ("riscv32" | "riscv64", "zacas") if get_version().0 < 20 => None,
         // Enable the evex512 target feature if an avx512 target feature is enabled.
         ("x86", s) if s.starts_with("avx512") => {
             Some(LLVMFeature::with_dependency(s, TargetFeatureFoldStrength::EnableOnly("evex512")))
@@ -295,6 +300,13 @@ pub(crate) fn to_llvm_features<'a>(sess: &Session, s: &'a str) -> Option<LLVMFea
         ("sparc", "v8plus") if get_version().0 == 19 => Some(LLVMFeature::new("v9")),
         ("sparc", "v8plus") if get_version().0 < 19 => None,
         ("powerpc", "power8-crypto") => Some(LLVMFeature::new("crypto")),
+        // These new `amx` variants and `movrs` were introduced in LLVM20
+        ("x86", "amx-avx512" | "amx-fp8" | "amx-movrs" | "amx-tf32" | "amx-transpose")
+            if get_version().0 < 20 =>
+        {
+            None
+        }
+        ("x86", "movrs") if get_version().0 < 20 => None,
         (_, s) => Some(LLVMFeature::new(s)),
     }
 }
@@ -303,44 +315,44 @@ pub(crate) fn to_llvm_features<'a>(sess: &Session, s: &'a str) -> Option<LLVMFea
 /// Must express features in the way Rust understands them.
 ///
 /// We do not have to worry about RUSTC_SPECIFIC_FEATURES here, those are handled outside codegen.
-pub fn target_features_cfg(sess: &Session, allow_unstable: bool) -> Vec<Symbol> {
-    let mut features: FxHashSet<Symbol> = Default::default();
-
+pub(crate) fn target_features_cfg(sess: &Session) -> (Vec<Symbol>, Vec<Symbol>) {
     // Add base features for the target.
     // We do *not* add the -Ctarget-features there, and instead duplicate the logic for that below.
     // The reason is that if LLVM considers a feature implied but we do not, we don't want that to
     // show up in `cfg`. That way, `cfg` is entirely under our control -- except for the handling of
-    // the target CPU, that is still expanded to target features (with all their implied features) by
-    // LLVM.
+    // the target CPU, that is still expanded to target features (with all their implied features)
+    // by LLVM.
     let target_machine = create_informational_target_machine(sess, true);
-    // Compute which of the known target features are enabled in the 'base' target machine.
-    // We only consider "supported" features; "forbidden" features are not reflected in `cfg` as of now.
-    features.extend(
-        sess.target
-            .rust_target_features()
-            .iter()
-            .filter(|(feature, _, _)| {
-                // skip checking special features, as LLVM may not understand them
-                if RUSTC_SPECIAL_FEATURES.contains(feature) {
-                    return true;
-                }
-                // check that all features in a given smallvec are enabled
-                if let Some(feat) = to_llvm_features(sess, feature) {
-                    for llvm_feature in feat {
-                        let cstr = SmallCStr::new(llvm_feature);
-                        if !unsafe { llvm::LLVMRustHasFeature(&target_machine, cstr.as_ptr()) } {
-                            return false;
-                        }
+    // Compute which of the known target features are enabled in the 'base' target machine. We only
+    // consider "supported" features; "forbidden" features are not reflected in `cfg` as of now.
+    let mut features: FxHashSet<Symbol> = sess
+        .target
+        .rust_target_features()
+        .iter()
+        .filter(|(feature, _, _)| {
+            // skip checking special features, as LLVM may not understand them
+            if RUSTC_SPECIAL_FEATURES.contains(feature) {
+                return true;
+            }
+            if let Some(feat) = to_llvm_features(sess, feature) {
+                for llvm_feature in feat {
+                    let cstr = SmallCStr::new(llvm_feature);
+                    // `LLVMRustHasFeature` is moderately expensive. On targets with many
+                    // features (e.g. x86) these calls take a non-trivial fraction of runtime
+                    // when compiling very small programs.
+                    if !unsafe { llvm::LLVMRustHasFeature(target_machine.raw(), cstr.as_ptr()) } {
+                        return false;
                     }
-                    true
-                } else {
-                    false
                 }
-            })
-            .map(|(feature, _, _)| Symbol::intern(feature)),
-    );
+                true
+            } else {
+                false
+            }
+        })
+        .map(|(feature, _, _)| Symbol::intern(feature))
+        .collect();
 
-    // Add enabled features
+    // Add enabled and remove disabled features.
     for (enabled, feature) in
         sess.opts.cg.target_feature.split(',').filter_map(|s| match s.chars().next() {
             Some('+') => Some((true, Symbol::intern(&s[1..]))),
@@ -356,7 +368,7 @@ pub fn target_features_cfg(sess: &Session, allow_unstable: bool) -> Vec<Symbol>
             #[allow(rustc::potential_query_instability)]
             features.extend(
                 sess.target
-                    .implied_target_features(std::iter::once(feature.as_str()))
+                    .implied_target_features(feature.as_str())
                     .iter()
                     .map(|s| Symbol::intern(s)),
             );
@@ -367,11 +379,7 @@ pub fn target_features_cfg(sess: &Session, allow_unstable: bool) -> Vec<Symbol>
             // `features.contains` below.
             #[allow(rustc::potential_query_instability)]
             features.retain(|f| {
-                if sess
-                    .target
-                    .implied_target_features(std::iter::once(f.as_str()))
-                    .contains(&feature.as_str())
-                {
+                if sess.target.implied_target_features(f.as_str()).contains(&feature.as_str()) {
                     // If `f` if implies `feature`, then `!feature` implies `!f`, so we have to
                     // remove `f`. (This is the standard logical contraposition principle.)
                     false
@@ -383,25 +391,31 @@ pub fn target_features_cfg(sess: &Session, allow_unstable: bool) -> Vec<Symbol>
         }
     }
 
-    // Filter enabled features based on feature gates
-    sess.target
-        .rust_target_features()
-        .iter()
-        .filter_map(|(feature, gate, _)| {
-            // The `allow_unstable` set is used by rustc internally to determined which target
-            // features are truly available, so we want to return even perma-unstable "forbidden"
-            // features.
-            if allow_unstable
-                || (gate.in_cfg() && (sess.is_nightly_build() || gate.requires_nightly().is_none()))
-            {
-                Some(*feature)
-            } else {
-                None
-            }
-        })
-        .filter(|feature| features.contains(&Symbol::intern(feature)))
-        .map(|feature| Symbol::intern(feature))
-        .collect()
+    // Filter enabled features based on feature gates.
+    let f = |allow_unstable| {
+        sess.target
+            .rust_target_features()
+            .iter()
+            .filter_map(|(feature, gate, _)| {
+                // The `allow_unstable` set is used by rustc internally to determined which target
+                // features are truly available, so we want to return even perma-unstable
+                // "forbidden" features.
+                if allow_unstable
+                    || (gate.in_cfg()
+                        && (sess.is_nightly_build() || gate.requires_nightly().is_none()))
+                {
+                    Some(Symbol::intern(feature))
+                } else {
+                    None
+                }
+            })
+            .filter(|feature| features.contains(&feature))
+            .collect()
+    };
+
+    let target_features = f(false);
+    let unstable_target_features = f(true);
+    (target_features, unstable_target_features)
 }
 
 pub(crate) fn print_version() {
@@ -450,8 +464,8 @@ pub(crate) fn print(req: &PrintRequest, out: &mut String, sess: &Session) {
     require_inited();
     let tm = create_informational_target_machine(sess, false);
     match req.kind {
-        PrintKind::TargetCPUs => print_target_cpus(sess, &tm, out),
-        PrintKind::TargetFeatures => print_target_features(sess, &tm, out),
+        PrintKind::TargetCPUs => print_target_cpus(sess, tm.raw(), out),
+        PrintKind::TargetFeatures => print_target_features(sess, tm.raw(), out),
         _ => bug!("rustc_codegen_llvm can't handle print request: {:?}", req),
     }
 }
@@ -678,7 +692,7 @@ pub(crate) fn global_llvm_features(
         for feature in sess.opts.cg.target_feature.split(',') {
             if let Some(feature) = feature.strip_prefix('+') {
                 all_rust_features.extend(
-                    UnordSet::from(sess.target.implied_target_features(std::iter::once(feature)))
+                    UnordSet::from(sess.target.implied_target_features(feature))
                         .to_sorted_stable_ord()
                         .iter()
                         .map(|&&s| (true, s)),
diff --git a/compiler/rustc_codegen_llvm/src/mono_item.rs b/compiler/rustc_codegen_llvm/src/mono_item.rs
index 33789c6261f..fdf62a08065 100644
--- a/compiler/rustc_codegen_llvm/src/mono_item.rs
+++ b/compiler/rustc_codegen_llvm/src/mono_item.rs
@@ -38,11 +38,7 @@ impl<'tcx> PreDefineCodegenMethods<'tcx> for CodegenCx<'_, 'tcx> {
 
         llvm::set_linkage(g, base::linkage_to_llvm(linkage));
         llvm::set_visibility(g, base::visibility_to_llvm(visibility));
-        unsafe {
-            if self.should_assume_dso_local(g, false) {
-                llvm::LLVMRustSetDSOLocal(g, true);
-            }
-        }
+        self.assume_dso_local(g, false);
 
         self.instances.borrow_mut().insert(instance, g);
     }
@@ -71,10 +67,7 @@ impl<'tcx> PreDefineCodegenMethods<'tcx> for CodegenCx<'_, 'tcx> {
         // compiler-rt, then we want to implicitly compile everything with hidden
         // visibility as we're going to link this object all over the place but
         // don't want the symbols to get exported.
-        if linkage != Linkage::Internal
-            && linkage != Linkage::Private
-            && self.tcx.is_compiler_builtins(LOCAL_CRATE)
-        {
+        if linkage != Linkage::Internal && self.tcx.is_compiler_builtins(LOCAL_CRATE) {
             llvm::set_visibility(lldecl, llvm::Visibility::Hidden);
         } else {
             llvm::set_visibility(lldecl, base::visibility_to_llvm(visibility));
@@ -82,9 +75,7 @@ impl<'tcx> PreDefineCodegenMethods<'tcx> for CodegenCx<'_, 'tcx> {
 
         debug!("predefine_fn: instance = {:?}", instance);
 
-        if self.should_assume_dso_local(lldecl, false) {
-            unsafe { llvm::LLVMRustSetDSOLocal(lldecl, true) };
-        }
+        self.assume_dso_local(lldecl, false);
 
         self.instances.borrow_mut().insert(instance, lldecl);
     }
@@ -93,11 +84,16 @@ impl<'tcx> PreDefineCodegenMethods<'tcx> for CodegenCx<'_, 'tcx> {
 impl CodegenCx<'_, '_> {
     /// Whether a definition or declaration can be assumed to be local to a group of
     /// libraries that form a single DSO or executable.
-    pub(crate) fn should_assume_dso_local(
-        &self,
-        llval: &llvm::Value,
-        is_declaration: bool,
-    ) -> bool {
+    /// Marks the local as DSO if so.
+    pub(crate) fn assume_dso_local(&self, llval: &llvm::Value, is_declaration: bool) -> bool {
+        let assume = self.should_assume_dso_local(llval, is_declaration);
+        if assume {
+            llvm::set_dso_local(llval);
+        }
+        assume
+    }
+
+    fn should_assume_dso_local(&self, llval: &llvm::Value, is_declaration: bool) -> bool {
         let linkage = llvm::get_linkage(llval);
         let visibility = llvm::get_visibility(llval);
 
@@ -124,7 +120,7 @@ impl CodegenCx<'_, '_> {
         }
 
         // Match clang by only supporting COFF and ELF for now.
-        if self.tcx.sess.target.is_like_osx {
+        if self.tcx.sess.target.is_like_darwin {
             return false;
         }
 
diff --git a/compiler/rustc_codegen_llvm/src/type_.rs b/compiler/rustc_codegen_llvm/src/type_.rs
index c56ad886120..b89ce90d1a1 100644
--- a/compiler/rustc_codegen_llvm/src/type_.rs
+++ b/compiler/rustc_codegen_llvm/src/type_.rs
@@ -1,17 +1,18 @@
+use std::borrow::Borrow;
 use std::{fmt, ptr};
 
 use libc::{c_char, c_uint};
-use rustc_abi::{AddressSpace, Align, Integer, Size};
+use rustc_abi::{AddressSpace, Align, Integer, Reg, Size};
 use rustc_codegen_ssa::common::TypeKind;
 use rustc_codegen_ssa::traits::*;
 use rustc_data_structures::small_c_str::SmallCStr;
 use rustc_middle::bug;
 use rustc_middle::ty::layout::TyAndLayout;
 use rustc_middle::ty::{self, Ty};
-use rustc_target::callconv::{CastTarget, FnAbi, Reg};
+use rustc_target::callconv::{CastTarget, FnAbi};
 
 use crate::abi::{FnAbiLlvmExt, LlvmType};
-use crate::context::{CodegenCx, SimpleCx};
+use crate::context::{CodegenCx, GenericCx, SCx};
 pub(crate) use crate::llvm::Type;
 use crate::llvm::{Bool, False, Metadata, True};
 use crate::type_of::LayoutLlvmExt;
@@ -36,29 +37,29 @@ impl fmt::Debug for Type {
 }
 
 impl<'ll> CodegenCx<'ll, '_> {}
-impl<'ll> SimpleCx<'ll> {
+impl<'ll, CX: Borrow<SCx<'ll>>> GenericCx<'ll, CX> {
     pub(crate) fn type_named_struct(&self, name: &str) -> &'ll Type {
         let name = SmallCStr::new(name);
-        unsafe { llvm::LLVMStructCreateNamed(self.llcx, name.as_ptr()) }
+        unsafe { llvm::LLVMStructCreateNamed(self.llcx(), name.as_ptr()) }
     }
 
     pub(crate) fn set_struct_body(&self, ty: &'ll Type, els: &[&'ll Type], packed: bool) {
         unsafe { llvm::LLVMStructSetBody(ty, els.as_ptr(), els.len() as c_uint, packed as Bool) }
     }
     pub(crate) fn type_void(&self) -> &'ll Type {
-        unsafe { llvm::LLVMVoidTypeInContext(self.llcx) }
+        unsafe { llvm::LLVMVoidTypeInContext(self.llcx()) }
     }
     pub(crate) fn type_token(&self) -> &'ll Type {
-        unsafe { llvm::LLVMTokenTypeInContext(self.llcx) }
+        unsafe { llvm::LLVMTokenTypeInContext(self.llcx()) }
     }
 
     pub(crate) fn type_metadata(&self) -> &'ll Type {
-        unsafe { llvm::LLVMMetadataTypeInContext(self.llcx) }
+        unsafe { llvm::LLVMMetadataTypeInContext(self.llcx()) }
     }
 
     ///x Creates an integer type with the given number of bits, e.g., i24
     pub(crate) fn type_ix(&self, num_bits: u64) -> &'ll Type {
-        unsafe { llvm::LLVMIntTypeInContext(self.llcx, num_bits as c_uint) }
+        unsafe { llvm::LLVMIntTypeInContext(self.llcx(), num_bits as c_uint) }
     }
 
     pub(crate) fn type_vector(&self, ty: &'ll Type, len: u64) -> &'ll Type {
@@ -121,19 +122,28 @@ impl<'ll, 'tcx> CodegenCx<'ll, 'tcx> {
         self.type_array(self.type_from_integer(unit), size / unit_size)
     }
 }
-impl<'ll> SimpleCx<'ll> {
+
+impl<'ll, CX: Borrow<SCx<'ll>>> GenericCx<'ll, CX> {
+    pub(crate) fn llcx(&self) -> &'ll llvm::Context {
+        (**self).borrow().llcx
+    }
+
+    pub(crate) fn isize_ty(&self) -> &'ll Type {
+        (**self).borrow().isize_ty
+    }
+
     pub(crate) fn type_variadic_func(&self, args: &[&'ll Type], ret: &'ll Type) -> &'ll Type {
         unsafe { llvm::LLVMFunctionType(ret, args.as_ptr(), args.len() as c_uint, True) }
     }
 
     pub(crate) fn type_i1(&self) -> &'ll Type {
-        unsafe { llvm::LLVMInt1TypeInContext(self.llcx) }
+        unsafe { llvm::LLVMInt1TypeInContext(self.llcx()) }
     }
 
     pub(crate) fn type_struct(&self, els: &[&'ll Type], packed: bool) -> &'ll Type {
         unsafe {
             llvm::LLVMStructTypeInContext(
-                self.llcx,
+                self.llcx(),
                 els.as_ptr(),
                 els.len() as c_uint,
                 packed as Bool,
@@ -142,45 +152,45 @@ impl<'ll> SimpleCx<'ll> {
     }
 }
 
-impl<'ll, 'tcx> BaseTypeCodegenMethods<'tcx> for CodegenCx<'ll, 'tcx> {
+impl<'ll, CX: Borrow<SCx<'ll>>> BaseTypeCodegenMethods for GenericCx<'ll, CX> {
     fn type_i8(&self) -> &'ll Type {
-        unsafe { llvm::LLVMInt8TypeInContext(self.llcx) }
+        unsafe { llvm::LLVMInt8TypeInContext(self.llcx()) }
     }
 
     fn type_i16(&self) -> &'ll Type {
-        unsafe { llvm::LLVMInt16TypeInContext(self.llcx) }
+        unsafe { llvm::LLVMInt16TypeInContext(self.llcx()) }
     }
 
     fn type_i32(&self) -> &'ll Type {
-        unsafe { llvm::LLVMInt32TypeInContext(self.llcx) }
+        unsafe { llvm::LLVMInt32TypeInContext(self.llcx()) }
     }
 
     fn type_i64(&self) -> &'ll Type {
-        unsafe { llvm::LLVMInt64TypeInContext(self.llcx) }
+        unsafe { llvm::LLVMInt64TypeInContext(self.llcx()) }
     }
 
     fn type_i128(&self) -> &'ll Type {
-        unsafe { llvm::LLVMIntTypeInContext(self.llcx, 128) }
+        unsafe { llvm::LLVMIntTypeInContext(self.llcx(), 128) }
     }
 
     fn type_isize(&self) -> &'ll Type {
-        self.isize_ty
+        self.isize_ty()
     }
 
     fn type_f16(&self) -> &'ll Type {
-        unsafe { llvm::LLVMHalfTypeInContext(self.llcx) }
+        unsafe { llvm::LLVMHalfTypeInContext(self.llcx()) }
     }
 
     fn type_f32(&self) -> &'ll Type {
-        unsafe { llvm::LLVMFloatTypeInContext(self.llcx) }
+        unsafe { llvm::LLVMFloatTypeInContext(self.llcx()) }
     }
 
     fn type_f64(&self) -> &'ll Type {
-        unsafe { llvm::LLVMDoubleTypeInContext(self.llcx) }
+        unsafe { llvm::LLVMDoubleTypeInContext(self.llcx()) }
     }
 
     fn type_f128(&self) -> &'ll Type {
-        unsafe { llvm::LLVMFP128TypeInContext(self.llcx) }
+        unsafe { llvm::LLVMFP128TypeInContext(self.llcx()) }
     }
 
     fn type_func(&self, args: &[&'ll Type], ret: &'ll Type) -> &'ll Type {
@@ -196,7 +206,7 @@ impl<'ll, 'tcx> BaseTypeCodegenMethods<'tcx> for CodegenCx<'ll, 'tcx> {
     }
 
     fn type_ptr_ext(&self, address_space: AddressSpace) -> &'ll Type {
-        unsafe { llvm::LLVMPointerTypeInContext(self.llcx, address_space.0) }
+        unsafe { llvm::LLVMPointerTypeInContext(self.llcx(), address_space.0) }
     }
 
     fn element_type(&self, ty: &'ll Type) -> &'ll Type {
@@ -237,11 +247,11 @@ impl<'ll, 'tcx> BaseTypeCodegenMethods<'tcx> for CodegenCx<'ll, 'tcx> {
 
 impl Type {
     /// Creates an integer type with the given number of bits, e.g., i24
-    pub fn ix_llcx(llcx: &llvm::Context, num_bits: u64) -> &Type {
+    pub(crate) fn ix_llcx(llcx: &llvm::Context, num_bits: u64) -> &Type {
         unsafe { llvm::LLVMIntTypeInContext(llcx, num_bits as c_uint) }
     }
 
-    pub fn ptr_llcx(llcx: &llvm::Context) -> &Type {
+    pub(crate) fn ptr_llcx(llcx: &llvm::Context) -> &Type {
         unsafe { llvm::LLVMPointerTypeInContext(llcx, AddressSpace::DATA.0) }
     }
 }
diff --git a/compiler/rustc_codegen_llvm/src/type_of.rs b/compiler/rustc_codegen_llvm/src/type_of.rs
index b0b6da869da..4e7096da502 100644
--- a/compiler/rustc_codegen_llvm/src/type_of.rs
+++ b/compiler/rustc_codegen_llvm/src/type_of.rs
@@ -19,11 +19,11 @@ fn uncached_llvm_type<'a, 'tcx>(
 ) -> &'a Type {
     match layout.backend_repr {
         BackendRepr::Scalar(_) => bug!("handled elsewhere"),
-        BackendRepr::Vector { element, count } => {
+        BackendRepr::SimdVector { element, count } => {
             let element = layout.scalar_llvm_type_at(cx, element);
             return cx.type_vector(element, count);
         }
-        BackendRepr::Uninhabited | BackendRepr::Memory { .. } | BackendRepr::ScalarPair(..) => {}
+        BackendRepr::Memory { .. } | BackendRepr::ScalarPair(..) => {}
     }
 
     let name = match layout.ty.kind() {
@@ -171,19 +171,16 @@ pub(crate) trait LayoutLlvmExt<'tcx> {
 impl<'tcx> LayoutLlvmExt<'tcx> for TyAndLayout<'tcx> {
     fn is_llvm_immediate(&self) -> bool {
         match self.backend_repr {
-            BackendRepr::Scalar(_) | BackendRepr::Vector { .. } => true,
-            BackendRepr::ScalarPair(..) | BackendRepr::Uninhabited | BackendRepr::Memory { .. } => {
-                false
-            }
+            BackendRepr::Scalar(_) | BackendRepr::SimdVector { .. } => true,
+            BackendRepr::ScalarPair(..) | BackendRepr::Memory { .. } => false,
         }
     }
 
     fn is_llvm_scalar_pair(&self) -> bool {
         match self.backend_repr {
             BackendRepr::ScalarPair(..) => true,
-            BackendRepr::Uninhabited
-            | BackendRepr::Scalar(_)
-            | BackendRepr::Vector { .. }
+            BackendRepr::Scalar(_)
+            | BackendRepr::SimdVector { .. }
             | BackendRepr::Memory { .. } => false,
         }
     }
diff --git a/compiler/rustc_codegen_llvm/src/va_arg.rs b/compiler/rustc_codegen_llvm/src/va_arg.rs
index 8baa69cefe1..c216f0f4a09 100644
--- a/compiler/rustc_codegen_llvm/src/va_arg.rs
+++ b/compiler/rustc_codegen_llvm/src/va_arg.rs
@@ -399,7 +399,7 @@ pub(super) fn emit_va_arg<'ll, 'tcx>(
             emit_ptr_va_arg(bx, addr, target_ty, false, Align::from_bytes(8).unwrap(), false)
         }
         // macOS / iOS AArch64
-        "aarch64" if target.is_like_osx => {
+        "aarch64" if target.is_like_darwin => {
             emit_ptr_va_arg(bx, addr, target_ty, false, Align::from_bytes(8).unwrap(), true)
         }
         "aarch64" => emit_aapcs_va_arg(bx, addr, target_ty),