18 files changed, 541 insertions, 1248 deletions
diff --git a/compiler/rustc_codegen_llvm/messages.ftl b/compiler/rustc_codegen_llvm/messages.ftl
index bda121c67fb..3885f18271f 100644
--- a/compiler/rustc_codegen_llvm/messages.ftl
+++ b/compiler/rustc_codegen_llvm/messages.ftl
@@ -10,11 +10,6 @@ codegen_llvm_dynamic_linking_with_lto =
 
 codegen_llvm_fixed_x18_invalid_arch = the `-Zfixed-x18` flag is not supported on the `{$arch}` architecture
 
-codegen_llvm_forbidden_ctarget_feature =
-    target feature `{$feature}` cannot be {$enabled} with `-Ctarget-feature`: {$reason}
-    .note = this was previously accepted by the compiler but is being phased out; it will become a hard error in a future release!
-codegen_llvm_forbidden_ctarget_feature_issue = for more information, see issue #116344 <https://github.com/rust-lang/rust/issues/116344>
-
 codegen_llvm_from_llvm_diag = {$message}
 
 codegen_llvm_from_llvm_optimization_diag = {$filename}:{$line}:{$column} {$pass_name} ({$kind}): {$message}
@@ -64,22 +59,8 @@ codegen_llvm_symbol_already_defined =
 codegen_llvm_target_machine = could not create LLVM TargetMachine for triple: {$triple}
 codegen_llvm_target_machine_with_llvm_err = could not create LLVM TargetMachine for triple: {$triple}: {$llvm_err}
 
-codegen_llvm_unknown_ctarget_feature =
-    unknown and unstable feature specified for `-Ctarget-feature`: `{$feature}`
-    .note = it is still passed through to the codegen backend, but use of this feature might be unsound and the behavior of this feature can change in the future
-    .possible_feature = you might have meant: `{$rust_feature}`
-    .consider_filing_feature_request = consider filing a feature request
-
-codegen_llvm_unknown_ctarget_feature_prefix =
-    unknown feature specified for `-Ctarget-feature`: `{$feature}`
-    .note = features must begin with a `+` to enable or `-` to disable it
-
 codegen_llvm_unknown_debuginfo_compression = unknown debuginfo compression algorithm {$algorithm} - will fall back to uncompressed debuginfo
 
-codegen_llvm_unstable_ctarget_feature =
-    unstable feature specified for `-Ctarget-feature`: `{$feature}`
-    .note = this feature is not stably supported; its behavior can change in the future
-
 codegen_llvm_write_bytecode = failed to write bytecode to {$path}: {$err}
 
 codegen_llvm_write_ir = failed to write LLVM IR to {$path}
diff --git a/compiler/rustc_codegen_llvm/src/abi.rs b/compiler/rustc_codegen_llvm/src/abi.rs
index 119cd634f98..4b07c8aef91 100644
--- a/compiler/rustc_codegen_llvm/src/abi.rs
+++ b/compiler/rustc_codegen_llvm/src/abi.rs
@@ -229,7 +229,7 @@ impl<'ll, 'tcx> ArgAbiExt<'ll, 'tcx> for ArgAbi<'tcx, Ty<'tcx>> {
                 let llscratch = bx.alloca(scratch_size, scratch_align);
                 bx.lifetime_start(llscratch, scratch_size);
                 // ...store the value...
-                bx.store(val, llscratch, scratch_align);
+                rustc_codegen_ssa::mir::store_cast(bx, cast, val, llscratch, scratch_align);
                 // ... and then memcpy it to the intended destination.
                 bx.memcpy(
                     dst.val.llval,
@@ -649,6 +649,10 @@ impl llvm::CallConv {
         match conv {
             CanonAbi::C | CanonAbi::Rust => llvm::CCallConv,
             CanonAbi::RustCold => llvm::PreserveMost,
+            // Functions with this calling convention can only be called from assembly, but it is
+            // possible to declare an `extern "custom"` block, so the backend still needs a calling
+            // convention for declaring foreign functions.
+            CanonAbi::Custom => llvm::CCallConv,
             CanonAbi::GpuKernel => {
                 if arch == "amdgpu" {
                     llvm::AmdgpuKernel
diff --git a/compiler/rustc_codegen_llvm/src/allocator.rs b/compiler/rustc_codegen_llvm/src/allocator.rs
index 4a78e694979..9dca63cfc8d 100644
--- a/compiler/rustc_codegen_llvm/src/allocator.rs
+++ b/compiler/rustc_codegen_llvm/src/allocator.rs
@@ -57,7 +57,7 @@ pub(crate) unsafe fn codegen(
             let from_name = mangle_internal_symbol(tcx, &global_fn_name(method.name));
             let to_name = mangle_internal_symbol(tcx, &default_fn_name(method.name));
 
-            create_wrapper_function(tcx, &cx, &from_name, &to_name, &args, output, false);
+            create_wrapper_function(tcx, &cx, &from_name, Some(&to_name), &args, output, false);
         }
     }
 
@@ -66,7 +66,7 @@ pub(crate) unsafe fn codegen(
         tcx,
         &cx,
         &mangle_internal_symbol(tcx, "__rust_alloc_error_handler"),
-        &mangle_internal_symbol(tcx, alloc_error_handler_name(alloc_error_handler_kind)),
+        Some(&mangle_internal_symbol(tcx, alloc_error_handler_name(alloc_error_handler_kind))),
         &[usize, usize], // size, align
         None,
         true,
@@ -81,11 +81,16 @@ pub(crate) unsafe fn codegen(
         let llval = llvm::LLVMConstInt(i8, val as u64, False);
         llvm::set_initializer(ll_g, llval);
 
-        let name = mangle_internal_symbol(tcx, NO_ALLOC_SHIM_IS_UNSTABLE);
-        let ll_g = cx.declare_global(&name, i8);
-        llvm::set_visibility(ll_g, llvm::Visibility::from_generic(tcx.sess.default_visibility()));
-        let llval = llvm::LLVMConstInt(i8, 0, False);
-        llvm::set_initializer(ll_g, llval);
+        // __rust_no_alloc_shim_is_unstable_v2
+        create_wrapper_function(
+            tcx,
+            &cx,
+            &mangle_internal_symbol(tcx, NO_ALLOC_SHIM_IS_UNSTABLE),
+            None,
+            &[],
+            None,
+            false,
+        );
     }
 
     if tcx.sess.opts.debuginfo != DebugInfo::None {
@@ -99,7 +104,7 @@ fn create_wrapper_function(
     tcx: TyCtxt<'_>,
     cx: &SimpleCx<'_>,
     from_name: &str,
-    to_name: &str,
+    to_name: Option<&str>,
     args: &[&Type],
     output: Option<&Type>,
     no_return: bool,
@@ -128,33 +133,38 @@ fn create_wrapper_function(
         attributes::apply_to_llfn(llfn, llvm::AttributePlace::Function, &[uwtable]);
     }
 
-    let callee = declare_simple_fn(
-        &cx,
-        to_name,
-        llvm::CallConv::CCallConv,
-        llvm::UnnamedAddr::Global,
-        llvm::Visibility::Hidden,
-        ty,
-    );
-    if let Some(no_return) = no_return {
-        // -> ! DIFlagNoReturn
-        attributes::apply_to_llfn(callee, llvm::AttributePlace::Function, &[no_return]);
-    }
-    llvm::set_visibility(callee, llvm::Visibility::Hidden);
-
     let llbb = unsafe { llvm::LLVMAppendBasicBlockInContext(cx.llcx, llfn, c"entry".as_ptr()) };
-
     let mut bx = SBuilder::build(&cx, llbb);
-    let args = args
-        .iter()
-        .enumerate()
-        .map(|(i, _)| llvm::get_param(llfn, i as c_uint))
-        .collect::<Vec<_>>();
-    let ret = bx.call(ty, callee, &args, None);
-    llvm::LLVMSetTailCall(ret, True);
-    if output.is_some() {
-        bx.ret(ret);
+
+    if let Some(to_name) = to_name {
+        let callee = declare_simple_fn(
+            &cx,
+            to_name,
+            llvm::CallConv::CCallConv,
+            llvm::UnnamedAddr::Global,
+            llvm::Visibility::Hidden,
+            ty,
+        );
+        if let Some(no_return) = no_return {
+            // -> ! DIFlagNoReturn
+            attributes::apply_to_llfn(callee, llvm::AttributePlace::Function, &[no_return]);
+        }
+        llvm::set_visibility(callee, llvm::Visibility::Hidden);
+
+        let args = args
+            .iter()
+            .enumerate()
+            .map(|(i, _)| llvm::get_param(llfn, i as c_uint))
+            .collect::<Vec<_>>();
+        let ret = bx.call(ty, callee, &args, None);
+        llvm::LLVMSetTailCall(ret, True);
+        if output.is_some() {
+            bx.ret(ret);
+        } else {
+            bx.ret_void()
+        }
     } else {
+        assert!(output.is_none());
         bx.ret_void()
     }
 }
diff --git a/compiler/rustc_codegen_llvm/src/asm.rs b/compiler/rustc_codegen_llvm/src/asm.rs
index 4185aef8b31..9ddadcf16aa 100644
--- a/compiler/rustc_codegen_llvm/src/asm.rs
+++ b/compiler/rustc_codegen_llvm/src/asm.rs
@@ -1021,6 +1021,15 @@ fn llvm_fixup_input<'ll, 'tcx>(
         ) if element.primitive() == Primitive::Float(Float::F16) => {
             bx.bitcast(value, bx.type_vector(bx.type_i16(), count))
         }
+        (LoongArch(LoongArchInlineAsmRegClass::freg), BackendRepr::Scalar(s))
+            if s.primitive() == Primitive::Float(Float::F16) =>
+        {
+            // Smaller floats are always "NaN-boxed" inside larger floats on LoongArch.
+            let value = bx.bitcast(value, bx.type_i16());
+            let value = bx.zext(value, bx.type_i32());
+            let value = bx.or(value, bx.const_u32(0xFFFF_0000));
+            bx.bitcast(value, bx.type_f32())
+        }
         (Mips(MipsInlineAsmRegClass::reg), BackendRepr::Scalar(s)) => {
             match s.primitive() {
                 // MIPS only supports register-length arithmetics.
@@ -1178,6 +1187,13 @@ fn llvm_fixup_output<'ll, 'tcx>(
         ) if element.primitive() == Primitive::Float(Float::F16) => {
             bx.bitcast(value, bx.type_vector(bx.type_f16(), count))
         }
+        (LoongArch(LoongArchInlineAsmRegClass::freg), BackendRepr::Scalar(s))
+            if s.primitive() == Primitive::Float(Float::F16) =>
+        {
+            let value = bx.bitcast(value, bx.type_i32());
+            let value = bx.trunc(value, bx.type_i16());
+            bx.bitcast(value, bx.type_f16())
+        }
         (Mips(MipsInlineAsmRegClass::reg), BackendRepr::Scalar(s)) => {
             match s.primitive() {
                 // MIPS only supports register-length arithmetics.
@@ -1318,6 +1334,11 @@ fn llvm_fixup_output_type<'ll, 'tcx>(
         ) if element.primitive() == Primitive::Float(Float::F16) => {
             cx.type_vector(cx.type_i16(), count)
         }
+        (LoongArch(LoongArchInlineAsmRegClass::freg), BackendRepr::Scalar(s))
+            if s.primitive() == Primitive::Float(Float::F16) =>
+        {
+            cx.type_f32()
+        }
         (Mips(MipsInlineAsmRegClass::reg), BackendRepr::Scalar(s)) => {
             match s.primitive() {
                 // MIPS only supports register-length arithmetics.
diff --git a/compiler/rustc_codegen_llvm/src/attributes.rs b/compiler/rustc_codegen_llvm/src/attributes.rs
index 443c2eace55..adb53e0b66c 100644
--- a/compiler/rustc_codegen_llvm/src/attributes.rs
+++ b/compiler/rustc_codegen_llvm/src/attributes.rs
@@ -5,6 +5,7 @@ use rustc_hir::def_id::DefId;
 use rustc_middle::middle::codegen_fn_attrs::{CodegenFnAttrFlags, PatchableFunctionEntry};
 use rustc_middle::ty::{self, TyCtxt};
 use rustc_session::config::{BranchProtection, FunctionReturn, OptLevel, PAuthKey, PacRet};
+use rustc_symbol_mangling::mangle_internal_symbol;
 use rustc_target::spec::{FramePointer, SanitizerSet, StackProbeType, StackProtector};
 use smallvec::SmallVec;
 
@@ -256,11 +257,11 @@ fn probestack_attr<'ll>(cx: &CodegenCx<'ll, '_>) -> Option<&'ll Attribute> {
         StackProbeType::Inline => "inline-asm",
         // Flag our internal `__rust_probestack` function as the stack probe symbol.
         // This is defined in the `compiler-builtins` crate for each architecture.
-        StackProbeType::Call => "__rust_probestack",
+        StackProbeType::Call => &mangle_internal_symbol(cx.tcx, "__rust_probestack"),
         // Pick from the two above based on the LLVM version.
         StackProbeType::InlineOrCall { min_llvm_version_for_inline } => {
             if llvm_util::get_version() < min_llvm_version_for_inline {
-                "__rust_probestack"
+                &mangle_internal_symbol(cx.tcx, "__rust_probestack")
             } else {
                 "inline-asm"
             }
@@ -490,11 +491,7 @@ pub(crate) fn llfn_attrs_from_instance<'ll, 'tcx>(
         let allocated_pointer = AttributeKind::AllocatedPointer.create_attr(cx.llcx);
         attributes::apply_to_llfn(llfn, AttributePlace::Argument(0), &[allocated_pointer]);
     }
-    // function alignment can be set globally with the `-Zmin-function-alignment=<n>` flag;
-    // the alignment from a `#[repr(align(<n>))]` is used if it specifies a higher alignment.
-    if let Some(align) =
-        Ord::max(cx.tcx.sess.opts.unstable_opts.min_function_alignment, codegen_fn_attrs.alignment)
-    {
+    if let Some(align) = codegen_fn_attrs.alignment {
         llvm::set_alignment(llfn, align);
     }
     if let Some(backchain) = backchain_attr(cx) {
diff --git a/compiler/rustc_codegen_llvm/src/back/lto.rs b/compiler/rustc_codegen_llvm/src/back/lto.rs
index ee46b49a094..9c62244f3c9 100644
--- a/compiler/rustc_codegen_llvm/src/back/lto.rs
+++ b/compiler/rustc_codegen_llvm/src/back/lto.rs
@@ -587,7 +587,7 @@ fn thin_lto(
 }
 
 fn enable_autodiff_settings(ad: &[config::AutoDiff]) {
-    for &val in ad {
+    for val in ad {
         // We intentionally don't use a wildcard, to not forget handling anything new.
         match val {
             config::AutoDiff::PrintPerf => {
@@ -599,6 +599,10 @@ fn enable_autodiff_settings(ad: &[config::AutoDiff]) {
             config::AutoDiff::PrintTA => {
                 llvm::set_print_type(true);
             }
+            config::AutoDiff::PrintTAFn(fun) => {
+                llvm::set_print_type(true); // Enable general type printing
+                llvm::set_print_type_fun(&fun); // Set specific function to analyze
+            }
             config::AutoDiff::Inline => {
                 llvm::set_inline(true);
             }
diff --git a/compiler/rustc_codegen_llvm/src/builder.rs b/compiler/rustc_codegen_llvm/src/builder.rs
index ec006b59192..d0aa7320b4b 100644
--- a/compiler/rustc_codegen_llvm/src/builder.rs
+++ b/compiler/rustc_codegen_llvm/src/builder.rs
@@ -14,7 +14,6 @@ use rustc_codegen_ssa::mir::place::PlaceRef;
 use rustc_codegen_ssa::traits::*;
 use rustc_data_structures::small_c_str::SmallCStr;
 use rustc_hir::def_id::DefId;
-use rustc_middle::bug;
 use rustc_middle::middle::codegen_fn_attrs::CodegenFnAttrs;
 use rustc_middle::ty::layout::{
     FnAbiError, FnAbiOfHelpers, FnAbiRequest, HasTypingEnv, LayoutError, LayoutOfHelpers,
@@ -484,73 +483,31 @@ impl<'a, 'll, 'tcx> BuilderMethods<'a, 'tcx> for Builder<'a, 'll, 'tcx> {
     fn checked_binop(
         &mut self,
         oop: OverflowOp,
-        ty: Ty<'_>,
+        ty: Ty<'tcx>,
         lhs: Self::Value,
         rhs: Self::Value,
     ) -> (Self::Value, Self::Value) {
-        use rustc_middle::ty::IntTy::*;
-        use rustc_middle::ty::UintTy::*;
-        use rustc_middle::ty::{Int, Uint};
-
-        let new_kind = match ty.kind() {
-            Int(t @ Isize) => Int(t.normalize(self.tcx.sess.target.pointer_width)),
-            Uint(t @ Usize) => Uint(t.normalize(self.tcx.sess.target.pointer_width)),
-            t @ (Uint(_) | Int(_)) => *t,
-            _ => panic!("tried to get overflow intrinsic for op applied to non-int type"),
+        let (size, signed) = ty.int_size_and_signed(self.tcx);
+        let width = size.bits();
+
+        if oop == OverflowOp::Sub && !signed {
+            // Emit sub and icmp instead of llvm.usub.with.overflow. LLVM considers these
+            // to be the canonical form. It will attempt to reform llvm.usub.with.overflow
+            // in the backend if profitable.
+            let sub = self.sub(lhs, rhs);
+            let cmp = self.icmp(IntPredicate::IntULT, lhs, rhs);
+            return (sub, cmp);
+        }
+
+        let oop_str = match oop {
+            OverflowOp::Add => "add",
+            OverflowOp::Sub => "sub",
+            OverflowOp::Mul => "mul",
         };
 
-        let name = match oop {
-            OverflowOp::Add => match new_kind {
-                Int(I8) => "llvm.sadd.with.overflow.i8",
-                Int(I16) => "llvm.sadd.with.overflow.i16",
-                Int(I32) => "llvm.sadd.with.overflow.i32",
-                Int(I64) => "llvm.sadd.with.overflow.i64",
-                Int(I128) => "llvm.sadd.with.overflow.i128",
-
-                Uint(U8) => "llvm.uadd.with.overflow.i8",
-                Uint(U16) => "llvm.uadd.with.overflow.i16",
-                Uint(U32) => "llvm.uadd.with.overflow.i32",
-                Uint(U64) => "llvm.uadd.with.overflow.i64",
-                Uint(U128) => "llvm.uadd.with.overflow.i128",
-
-                _ => unreachable!(),
-            },
-            OverflowOp::Sub => match new_kind {
-                Int(I8) => "llvm.ssub.with.overflow.i8",
-                Int(I16) => "llvm.ssub.with.overflow.i16",
-                Int(I32) => "llvm.ssub.with.overflow.i32",
-                Int(I64) => "llvm.ssub.with.overflow.i64",
-                Int(I128) => "llvm.ssub.with.overflow.i128",
-
-                Uint(_) => {
-                    // Emit sub and icmp instead of llvm.usub.with.overflow. LLVM considers these
-                    // to be the canonical form. It will attempt to reform llvm.usub.with.overflow
-                    // in the backend if profitable.
-                    let sub = self.sub(lhs, rhs);
-                    let cmp = self.icmp(IntPredicate::IntULT, lhs, rhs);
-                    return (sub, cmp);
-                }
-
-                _ => unreachable!(),
-            },
-            OverflowOp::Mul => match new_kind {
-                Int(I8) => "llvm.smul.with.overflow.i8",
-                Int(I16) => "llvm.smul.with.overflow.i16",
-                Int(I32) => "llvm.smul.with.overflow.i32",
-                Int(I64) => "llvm.smul.with.overflow.i64",
-                Int(I128) => "llvm.smul.with.overflow.i128",
-
-                Uint(U8) => "llvm.umul.with.overflow.i8",
-                Uint(U16) => "llvm.umul.with.overflow.i16",
-                Uint(U32) => "llvm.umul.with.overflow.i32",
-                Uint(U64) => "llvm.umul.with.overflow.i64",
-                Uint(U128) => "llvm.umul.with.overflow.i128",
-
-                _ => unreachable!(),
-            },
-        };
+        let name = format!("llvm.{}{oop_str}.with.overflow", if signed { 's' } else { 'u' });
 
-        let res = self.call_intrinsic(name, &[lhs, rhs]);
+        let res = self.call_intrinsic(name, &[self.type_ix(width)], &[lhs, rhs]);
         (self.extract_value(res, 0), self.extract_value(res, 1))
     }
 
@@ -954,11 +911,11 @@ impl<'a, 'll, 'tcx> BuilderMethods<'a, 'tcx> for Builder<'a, 'll, 'tcx> {
     }
 
     fn fptoui_sat(&mut self, val: &'ll Value, dest_ty: &'ll Type) -> &'ll Value {
-        self.fptoint_sat(false, val, dest_ty)
+        self.call_intrinsic("llvm.fptoui.sat", &[dest_ty, self.val_ty(val)], &[val])
     }
 
     fn fptosi_sat(&mut self, val: &'ll Value, dest_ty: &'ll Type) -> &'ll Value {
-        self.fptoint_sat(true, val, dest_ty)
+        self.call_intrinsic("llvm.fptosi.sat", &[dest_ty, self.val_ty(val)], &[val])
     }
 
     fn fptoui(&mut self, val: &'ll Value, dest_ty: &'ll Type) -> &'ll Value {
@@ -981,15 +938,12 @@ impl<'a, 'll, 'tcx> BuilderMethods<'a, 'tcx> for Builder<'a, 'll, 'tcx> {
             if self.cx.type_kind(src_ty) != TypeKind::Vector {
                 let float_width = self.cx.float_width(src_ty);
                 let int_width = self.cx.int_width(dest_ty);
-                let name = match (int_width, float_width) {
-                    (32, 32) => Some("llvm.wasm.trunc.unsigned.i32.f32"),
-                    (32, 64) => Some("llvm.wasm.trunc.unsigned.i32.f64"),
-                    (64, 32) => Some("llvm.wasm.trunc.unsigned.i64.f32"),
-                    (64, 64) => Some("llvm.wasm.trunc.unsigned.i64.f64"),
-                    _ => None,
-                };
-                if let Some(name) = name {
-                    return self.call_intrinsic(name, &[val]);
+                if matches!((int_width, float_width), (32 | 64, 32 | 64)) {
+                    return self.call_intrinsic(
+                        "llvm.wasm.trunc.unsigned",
+                        &[dest_ty, src_ty],
+                        &[val],
+                    );
                 }
             }
         }
@@ -1003,15 +957,12 @@ impl<'a, 'll, 'tcx> BuilderMethods<'a, 'tcx> for Builder<'a, 'll, 'tcx> {
             if self.cx.type_kind(src_ty) != TypeKind::Vector {
                 let float_width = self.cx.float_width(src_ty);
                 let int_width = self.cx.int_width(dest_ty);
-                let name = match (int_width, float_width) {
-                    (32, 32) => Some("llvm.wasm.trunc.signed.i32.f32"),
-                    (32, 64) => Some("llvm.wasm.trunc.signed.i32.f64"),
-                    (64, 32) => Some("llvm.wasm.trunc.signed.i64.f32"),
-                    (64, 64) => Some("llvm.wasm.trunc.signed.i64.f64"),
-                    _ => None,
-                };
-                if let Some(name) = name {
-                    return self.call_intrinsic(name, &[val]);
+                if matches!((int_width, float_width), (32 | 64, 32 | 64)) {
+                    return self.call_intrinsic(
+                        "llvm.wasm.trunc.signed",
+                        &[dest_ty, src_ty],
+                        &[val],
+                    );
                 }
             }
         }
@@ -1084,22 +1035,10 @@ impl<'a, 'll, 'tcx> BuilderMethods<'a, 'tcx> for Builder<'a, 'll, 'tcx> {
             return None;
         }
 
-        let name = match (ty.is_signed(), ty.primitive_size(self.tcx).bits()) {
-            (true, 8) => "llvm.scmp.i8.i8",
-            (true, 16) => "llvm.scmp.i8.i16",
-            (true, 32) => "llvm.scmp.i8.i32",
-            (true, 64) => "llvm.scmp.i8.i64",
-            (true, 128) => "llvm.scmp.i8.i128",
-
-            (false, 8) => "llvm.ucmp.i8.i8",
-            (false, 16) => "llvm.ucmp.i8.i16",
-            (false, 32) => "llvm.ucmp.i8.i32",
-            (false, 64) => "llvm.ucmp.i8.i64",
-            (false, 128) => "llvm.ucmp.i8.i128",
+        let size = ty.primitive_size(self.tcx);
+        let name = if ty.is_signed() { "llvm.scmp" } else { "llvm.ucmp" };
 
-            _ => bug!("three-way compare unsupported for type {ty:?}"),
-        };
-        Some(self.call_intrinsic(name, &[lhs, rhs]))
+        Some(self.call_intrinsic(name, &[self.type_i8(), self.type_ix(size.bits())], &[lhs, rhs]))
     }
 
     /* Miscellaneous instructions */
@@ -1227,11 +1166,10 @@ impl<'a, 'll, 'tcx> BuilderMethods<'a, 'tcx> for Builder<'a, 'll, 'tcx> {
         (self.extract_value(landing_pad, 0), self.extract_value(landing_pad, 1))
     }
 
-    fn filter_landing_pad(&mut self, pers_fn: &'ll Value) -> (&'ll Value, &'ll Value) {
+    fn filter_landing_pad(&mut self, pers_fn: &'ll Value) {
         let ty = self.type_struct(&[self.type_ptr(), self.type_i32()], false);
         let landing_pad = self.landing_pad(ty, pers_fn, 1);
         self.add_clause(landing_pad, self.const_array(self.type_ptr(), &[]));
-        (self.extract_value(landing_pad, 0), self.extract_value(landing_pad, 1))
     }
 
     fn resume(&mut self, exn0: &'ll Value, exn1: &'ll Value) {
@@ -1385,11 +1323,11 @@ impl<'a, 'll, 'tcx> BuilderMethods<'a, 'tcx> for Builder<'a, 'll, 'tcx> {
     }
 
     fn lifetime_start(&mut self, ptr: &'ll Value, size: Size) {
-        self.call_lifetime_intrinsic("llvm.lifetime.start.p0i8", ptr, size);
+        self.call_lifetime_intrinsic("llvm.lifetime.start", ptr, size);
     }
 
     fn lifetime_end(&mut self, ptr: &'ll Value, size: Size) {
-        self.call_lifetime_intrinsic("llvm.lifetime.end.p0i8", ptr, size);
+        self.call_lifetime_intrinsic("llvm.lifetime.end", ptr, size);
     }
 
     fn call(
@@ -1454,7 +1392,8 @@ impl<'ll> StaticBuilderMethods for Builder<'_, 'll, '_> {
         // Forward to the `get_static` method of `CodegenCx`
         let global = self.cx().get_static(def_id);
         if self.cx().tcx.is_thread_local_static(def_id) {
-            let pointer = self.call_intrinsic("llvm.threadlocal.address", &[global]);
+            let pointer =
+                self.call_intrinsic("llvm.threadlocal.address", &[self.val_ty(global)], &[global]);
             // Cast to default address space if globals are in a different addrspace
             self.pointercast(pointer, self.type_ptr())
         } else {
@@ -1649,12 +1588,17 @@ impl<'a, 'll, CX: Borrow<SCx<'ll>>> GenericBuilder<'a, 'll, CX> {
 }
 
 impl<'a, 'll, 'tcx> Builder<'a, 'll, 'tcx> {
-    pub(crate) fn call_intrinsic(&mut self, intrinsic: &str, args: &[&'ll Value]) -> &'ll Value {
-        let (ty, f) = self.cx.get_intrinsic(intrinsic);
+    pub(crate) fn call_intrinsic(
+        &mut self,
+        base_name: impl Into<Cow<'static, str>>,
+        type_params: &[&'ll Type],
+        args: &[&'ll Value],
+    ) -> &'ll Value {
+        let (ty, f) = self.cx.get_intrinsic(base_name.into(), type_params);
         self.call(ty, None, None, f, args, None, None)
     }
 
-    fn call_lifetime_intrinsic(&mut self, intrinsic: &str, ptr: &'ll Value, size: Size) {
+    fn call_lifetime_intrinsic(&mut self, intrinsic: &'static str, ptr: &'ll Value, size: Size) {
         let size = size.bytes();
         if size == 0 {
             return;
@@ -1664,7 +1608,7 @@ impl<'a, 'll, 'tcx> Builder<'a, 'll, 'tcx> {
             return;
         }
 
-        self.call_intrinsic(intrinsic, &[self.cx.const_u64(size), ptr]);
+        self.call_intrinsic(intrinsic, &[self.val_ty(ptr)], &[self.cx.const_u64(size), ptr]);
     }
 }
 impl<'a, 'll, CX: Borrow<SCx<'ll>>> GenericBuilder<'a, 'll, CX> {
@@ -1689,31 +1633,6 @@ impl<'a, 'll, CX: Borrow<SCx<'ll>>> GenericBuilder<'a, 'll, CX> {
     }
 }
 impl<'a, 'll, 'tcx> Builder<'a, 'll, 'tcx> {
-    fn fptoint_sat(&mut self, signed: bool, val: &'ll Value, dest_ty: &'ll Type) -> &'ll Value {
-        let src_ty = self.cx.val_ty(val);
-        let (float_ty, int_ty, vector_length) = if self.cx.type_kind(src_ty) == TypeKind::Vector {
-            assert_eq!(self.cx.vector_length(src_ty), self.cx.vector_length(dest_ty));
-            (
-                self.cx.element_type(src_ty),
-                self.cx.element_type(dest_ty),
-                Some(self.cx.vector_length(src_ty)),
-            )
-        } else {
-            (src_ty, dest_ty, None)
-        };
-        let float_width = self.cx.float_width(float_ty);
-        let int_width = self.cx.int_width(int_ty);
-
-        let instr = if signed { "fptosi" } else { "fptoui" };
-        let name = if let Some(vector_length) = vector_length {
-            format!("llvm.{instr}.sat.v{vector_length}i{int_width}.v{vector_length}f{float_width}")
-        } else {
-            format!("llvm.{instr}.sat.i{int_width}.f{float_width}")
-        };
-        let f = self.declare_cfn(&name, llvm::UnnamedAddr::No, self.type_func(&[src_ty], dest_ty));
-        self.call(self.type_func(&[src_ty], dest_ty), None, None, f, &[val], None, None)
-    }
-
     pub(crate) fn landing_pad(
         &mut self,
         ty: &'ll Type,
@@ -1819,7 +1738,7 @@ impl<'a, 'll, 'tcx> Builder<'a, 'll, 'tcx> {
             // llvm.type.test intrinsic. The LowerTypeTests link-time optimization pass replaces
             // calls to this intrinsic with code to test type membership.
             let typeid = self.get_metadata_value(typeid_metadata);
-            let cond = self.call_intrinsic("llvm.type.test", &[llfn, typeid]);
+            let cond = self.call_intrinsic("llvm.type.test", &[], &[llfn, typeid]);
             let bb_pass = self.append_sibling_block("type_test.pass");
             let bb_fail = self.append_sibling_block("type_test.fail");
             self.cond_br(cond, bb_pass, bb_fail);
@@ -1887,7 +1806,7 @@ impl<'a, 'll, 'tcx> Builder<'a, 'll, 'tcx> {
         num_counters: &'ll Value,
         index: &'ll Value,
     ) {
-        self.call_intrinsic("llvm.instrprof.increment", &[fn_name, hash, num_counters, index]);
+        self.call_intrinsic("llvm.instrprof.increment", &[], &[fn_name, hash, num_counters, index]);
     }
 
     /// Emits a call to `llvm.instrprof.mcdc.parameters`.
@@ -1906,7 +1825,7 @@ impl<'a, 'll, 'tcx> Builder<'a, 'll, 'tcx> {
         hash: &'ll Value,
         bitmap_bits: &'ll Value,
     ) {
-        self.call_intrinsic("llvm.instrprof.mcdc.parameters", &[fn_name, hash, bitmap_bits]);
+        self.call_intrinsic("llvm.instrprof.mcdc.parameters", &[], &[fn_name, hash, bitmap_bits]);
     }
 
     #[instrument(level = "debug", skip(self))]
@@ -1918,7 +1837,7 @@ impl<'a, 'll, 'tcx> Builder<'a, 'll, 'tcx> {
         mcdc_temp: &'ll Value,
     ) {
         let args = &[fn_name, hash, bitmap_index, mcdc_temp];
-        self.call_intrinsic("llvm.instrprof.mcdc.tvbitmap.update", args);
+        self.call_intrinsic("llvm.instrprof.mcdc.tvbitmap.update", &[], args);
     }
 
     #[instrument(level = "debug", skip(self))]
diff --git a/compiler/rustc_codegen_llvm/src/builder/autodiff.rs b/compiler/rustc_codegen_llvm/src/builder/autodiff.rs
index c5c13ac097a..b07d9a5cfca 100644
--- a/compiler/rustc_codegen_llvm/src/builder/autodiff.rs
+++ b/compiler/rustc_codegen_llvm/src/builder/autodiff.rs
@@ -114,7 +114,7 @@ fn match_args_from_caller_to_enzyme<'ll>(
             let mul = unsafe {
                 llvm::LLVMBuildMul(
                     builder.llbuilder,
-                    cx.get_const_i64(elem_bytes_size),
+                    cx.get_const_int(cx.type_i64(), elem_bytes_size),
                     next_outer_arg,
                     UNNAMED,
                 )
@@ -385,7 +385,7 @@ fn generate_enzyme_call<'ll>(
         if attrs.width > 1 {
             let enzyme_width = cx.create_metadata("enzyme_width".to_string()).unwrap();
             args.push(cx.get_metadata_value(enzyme_width));
-            args.push(cx.get_const_i64(attrs.width as u64));
+            args.push(cx.get_const_int(cx.type_i64(), attrs.width as u64));
         }
 
         let has_sret = has_sret(outer_fn);
diff --git a/compiler/rustc_codegen_llvm/src/common.rs b/compiler/rustc_codegen_llvm/src/common.rs
index 3cfa96393e9..ae5add59322 100644
--- a/compiler/rustc_codegen_llvm/src/common.rs
+++ b/compiler/rustc_codegen_llvm/src/common.rs
@@ -99,14 +99,14 @@ impl<'ll, CX: Borrow<SCx<'ll>>> BackendTypes for GenericCx<'ll, CX> {
     type DIVariable = &'ll llvm::debuginfo::DIVariable;
 }
 
-impl<'ll> CodegenCx<'ll, '_> {
+impl<'ll, CX: Borrow<SCx<'ll>>> GenericCx<'ll, CX> {
     pub(crate) fn const_array(&self, ty: &'ll Type, elts: &[&'ll Value]) -> &'ll Value {
         let len = u64::try_from(elts.len()).expect("LLVMConstArray2 elements len overflow");
         unsafe { llvm::LLVMConstArray2(ty, elts.as_ptr(), len) }
     }
 
     pub(crate) fn const_bytes(&self, bytes: &[u8]) -> &'ll Value {
-        bytes_in_context(self.llcx, bytes)
+        bytes_in_context(self.llcx(), bytes)
     }
 
     pub(crate) fn const_get_elt(&self, v: &'ll Value, idx: u64) -> &'ll Value {
diff --git a/compiler/rustc_codegen_llvm/src/context.rs b/compiler/rustc_codegen_llvm/src/context.rs
index 8d6e1d8941b..0324dff6ff2 100644
--- a/compiler/rustc_codegen_llvm/src/context.rs
+++ b/compiler/rustc_codegen_llvm/src/context.rs
@@ -1,4 +1,4 @@
-use std::borrow::Borrow;
+use std::borrow::{Borrow, Cow};
 use std::cell::{Cell, RefCell};
 use std::ffi::{CStr, c_char, c_uint};
 use std::marker::PhantomData;
@@ -137,7 +137,8 @@ pub(crate) struct FullCx<'ll, 'tcx> {
     eh_catch_typeinfo: Cell<Option<&'ll Value>>,
     pub rust_try_fn: Cell<Option<(&'ll Type, &'ll Value)>>,
 
-    intrinsics: RefCell<FxHashMap<&'static str, (&'ll Type, &'ll Value)>>,
+    intrinsics:
+        RefCell<FxHashMap<(Cow<'static, str>, SmallVec<[&'ll Type; 2]>), (&'ll Type, &'ll Value)>>,
 
     /// A counter that is used for generating local symbol names
     local_gen_sym_counter: Cell<usize>,
@@ -678,11 +679,8 @@ impl<'ll, CX: Borrow<SCx<'ll>>> GenericCx<'ll, CX> {
         llvm::LLVMMetadataAsValue(self.llcx(), metadata)
     }
 
-    // FIXME(autodiff): We should split `ConstCodegenMethods` to pull the reusable parts
-    // onto a trait that is also implemented for GenericCx.
-    pub(crate) fn get_const_i64(&self, n: u64) -> &'ll Value {
-        let ty = unsafe { llvm::LLVMInt64TypeInContext(self.llcx()) };
-        unsafe { llvm::LLVMConstInt(ty, n, llvm::False) }
+    pub(crate) fn get_const_int(&self, ty: &'ll Type, val: u64) -> &'ll Value {
+        unsafe { llvm::LLVMConstInt(ty, val, llvm::False) }
     }
 
     pub(crate) fn get_function(&self, name: &str) -> Option<&'ll Value> {
@@ -842,410 +840,41 @@ impl<'ll, 'tcx> MiscCodegenMethods<'tcx> for CodegenCx<'ll, 'tcx> {
 }
 
 impl<'ll> CodegenCx<'ll, '_> {
-    pub(crate) fn get_intrinsic(&self, key: &str) -> (&'ll Type, &'ll Value) {
-        if let Some(v) = self.intrinsics.borrow().get(key).cloned() {
-            return v;
-        }
-
-        self.declare_intrinsic(key).unwrap_or_else(|| bug!("unknown intrinsic '{}'", key))
-    }
-
-    fn insert_intrinsic(
+    pub(crate) fn get_intrinsic(
         &self,
-        name: &'static str,
-        args: Option<&[&'ll llvm::Type]>,
-        ret: &'ll llvm::Type,
-    ) -> (&'ll llvm::Type, &'ll llvm::Value) {
-        let fn_ty = if let Some(args) = args {
-            self.type_func(args, ret)
-        } else {
-            self.type_variadic_func(&[], ret)
-        };
-        let f = self.declare_cfn(name, llvm::UnnamedAddr::No, fn_ty);
-        self.intrinsics.borrow_mut().insert(name, (fn_ty, f));
-        (fn_ty, f)
-    }
-
-    fn declare_intrinsic(&self, key: &str) -> Option<(&'ll Type, &'ll Value)> {
-        macro_rules! ifn {
-            ($name:expr, fn() -> $ret:expr) => (
-                if key == $name {
-                    return Some(self.insert_intrinsic($name, Some(&[]), $ret));
-                }
-            );
-            ($name:expr, fn(...) -> $ret:expr) => (
-                if key == $name {
-                    return Some(self.insert_intrinsic($name, None, $ret));
-                }
-            );
-            ($name:expr, fn($($arg:expr),*) -> $ret:expr) => (
-                if key == $name {
-                    return Some(self.insert_intrinsic($name, Some(&[$($arg),*]), $ret));
-                }
-            );
-        }
-        macro_rules! mk_struct {
-            ($($field_ty:expr),*) => (self.type_struct( &[$($field_ty),*], false))
-        }
-
-        let ptr = self.type_ptr();
-        let void = self.type_void();
-        let i1 = self.type_i1();
-        let t_i8 = self.type_i8();
-        let t_i16 = self.type_i16();
-        let t_i32 = self.type_i32();
-        let t_i64 = self.type_i64();
-        let t_i128 = self.type_i128();
-        let t_isize = self.type_isize();
-        let t_f16 = self.type_f16();
-        let t_f32 = self.type_f32();
-        let t_f64 = self.type_f64();
-        let t_f128 = self.type_f128();
-        let t_metadata = self.type_metadata();
-        let t_token = self.type_token();
-
-        ifn!("llvm.wasm.get.exception", fn(t_token) -> ptr);
-        ifn!("llvm.wasm.get.ehselector", fn(t_token) -> t_i32);
-
-        ifn!("llvm.wasm.trunc.unsigned.i32.f32", fn(t_f32) -> t_i32);
-        ifn!("llvm.wasm.trunc.unsigned.i32.f64", fn(t_f64) -> t_i32);
-        ifn!("llvm.wasm.trunc.unsigned.i64.f32", fn(t_f32) -> t_i64);
-        ifn!("llvm.wasm.trunc.unsigned.i64.f64", fn(t_f64) -> t_i64);
-        ifn!("llvm.wasm.trunc.signed.i32.f32", fn(t_f32) -> t_i32);
-        ifn!("llvm.wasm.trunc.signed.i32.f64", fn(t_f64) -> t_i32);
-        ifn!("llvm.wasm.trunc.signed.i64.f32", fn(t_f32) -> t_i64);
-        ifn!("llvm.wasm.trunc.signed.i64.f64", fn(t_f64) -> t_i64);
-
-        ifn!("llvm.fptosi.sat.i8.f32", fn(t_f32) -> t_i8);
-        ifn!("llvm.fptosi.sat.i16.f32", fn(t_f32) -> t_i16);
-        ifn!("llvm.fptosi.sat.i32.f32", fn(t_f32) -> t_i32);
-        ifn!("llvm.fptosi.sat.i64.f32", fn(t_f32) -> t_i64);
-        ifn!("llvm.fptosi.sat.i128.f32", fn(t_f32) -> t_i128);
-        ifn!("llvm.fptosi.sat.i8.f64", fn(t_f64) -> t_i8);
-        ifn!("llvm.fptosi.sat.i16.f64", fn(t_f64) -> t_i16);
-        ifn!("llvm.fptosi.sat.i32.f64", fn(t_f64) -> t_i32);
-        ifn!("llvm.fptosi.sat.i64.f64", fn(t_f64) -> t_i64);
-        ifn!("llvm.fptosi.sat.i128.f64", fn(t_f64) -> t_i128);
-
-        ifn!("llvm.fptoui.sat.i8.f32", fn(t_f32) -> t_i8);
-        ifn!("llvm.fptoui.sat.i16.f32", fn(t_f32) -> t_i16);
-        ifn!("llvm.fptoui.sat.i32.f32", fn(t_f32) -> t_i32);
-        ifn!("llvm.fptoui.sat.i64.f32", fn(t_f32) -> t_i64);
-        ifn!("llvm.fptoui.sat.i128.f32", fn(t_f32) -> t_i128);
-        ifn!("llvm.fptoui.sat.i8.f64", fn(t_f64) -> t_i8);
-        ifn!("llvm.fptoui.sat.i16.f64", fn(t_f64) -> t_i16);
-        ifn!("llvm.fptoui.sat.i32.f64", fn(t_f64) -> t_i32);
-        ifn!("llvm.fptoui.sat.i64.f64", fn(t_f64) -> t_i64);
-        ifn!("llvm.fptoui.sat.i128.f64", fn(t_f64) -> t_i128);
-
-        ifn!("llvm.trap", fn() -> void);
-        ifn!("llvm.debugtrap", fn() -> void);
-        ifn!("llvm.frameaddress", fn(t_i32) -> ptr);
-
-        ifn!("llvm.powi.f16.i32", fn(t_f16, t_i32) -> t_f16);
-        ifn!("llvm.powi.f32.i32", fn(t_f32, t_i32) -> t_f32);
-        ifn!("llvm.powi.f64.i32", fn(t_f64, t_i32) -> t_f64);
-        ifn!("llvm.powi.f128.i32", fn(t_f128, t_i32) -> t_f128);
-
-        ifn!("llvm.pow.f16", fn(t_f16, t_f16) -> t_f16);
-        ifn!("llvm.pow.f32", fn(t_f32, t_f32) -> t_f32);
-        ifn!("llvm.pow.f64", fn(t_f64, t_f64) -> t_f64);
-        ifn!("llvm.pow.f128", fn(t_f128, t_f128) -> t_f128);
-
-        ifn!("llvm.sqrt.f16", fn(t_f16) -> t_f16);
-        ifn!("llvm.sqrt.f32", fn(t_f32) -> t_f32);
-        ifn!("llvm.sqrt.f64", fn(t_f64) -> t_f64);
-        ifn!("llvm.sqrt.f128", fn(t_f128) -> t_f128);
-
-        ifn!("llvm.sin.f16", fn(t_f16) -> t_f16);
-        ifn!("llvm.sin.f32", fn(t_f32) -> t_f32);
-        ifn!("llvm.sin.f64", fn(t_f64) -> t_f64);
-        ifn!("llvm.sin.f128", fn(t_f128) -> t_f128);
-
-        ifn!("llvm.cos.f16", fn(t_f16) -> t_f16);
-        ifn!("llvm.cos.f32", fn(t_f32) -> t_f32);
-        ifn!("llvm.cos.f64", fn(t_f64) -> t_f64);
-        ifn!("llvm.cos.f128", fn(t_f128) -> t_f128);
-
-        ifn!("llvm.exp.f16", fn(t_f16) -> t_f16);
-        ifn!("llvm.exp.f32", fn(t_f32) -> t_f32);
-        ifn!("llvm.exp.f64", fn(t_f64) -> t_f64);
-        ifn!("llvm.exp.f128", fn(t_f128) -> t_f128);
-
-        ifn!("llvm.exp2.f16", fn(t_f16) -> t_f16);
-        ifn!("llvm.exp2.f32", fn(t_f32) -> t_f32);
-        ifn!("llvm.exp2.f64", fn(t_f64) -> t_f64);
-        ifn!("llvm.exp2.f128", fn(t_f128) -> t_f128);
-
-        ifn!("llvm.log.f16", fn(t_f16) -> t_f16);
-        ifn!("llvm.log.f32", fn(t_f32) -> t_f32);
-        ifn!("llvm.log.f64", fn(t_f64) -> t_f64);
-        ifn!("llvm.log.f128", fn(t_f128) -> t_f128);
-
-        ifn!("llvm.log10.f16", fn(t_f16) -> t_f16);
-        ifn!("llvm.log10.f32", fn(t_f32) -> t_f32);
-        ifn!("llvm.log10.f64", fn(t_f64) -> t_f64);
-        ifn!("llvm.log10.f128", fn(t_f128) -> t_f128);
-
-        ifn!("llvm.log2.f16", fn(t_f16) -> t_f16);
-        ifn!("llvm.log2.f32", fn(t_f32) -> t_f32);
-        ifn!("llvm.log2.f64", fn(t_f64) -> t_f64);
-        ifn!("llvm.log2.f128", fn(t_f128) -> t_f128);
-
-        ifn!("llvm.fma.f16", fn(t_f16, t_f16, t_f16) -> t_f16);
-        ifn!("llvm.fma.f32", fn(t_f32, t_f32, t_f32) -> t_f32);
-        ifn!("llvm.fma.f64", fn(t_f64, t_f64, t_f64) -> t_f64);
-        ifn!("llvm.fma.f128", fn(t_f128, t_f128, t_f128) -> t_f128);
-
-        ifn!("llvm.fmuladd.f16", fn(t_f16, t_f16, t_f16) -> t_f16);
-        ifn!("llvm.fmuladd.f32", fn(t_f32, t_f32, t_f32) -> t_f32);
-        ifn!("llvm.fmuladd.f64", fn(t_f64, t_f64, t_f64) -> t_f64);
-        ifn!("llvm.fmuladd.f128", fn(t_f128, t_f128, t_f128) -> t_f128);
-
-        ifn!("llvm.fabs.f16", fn(t_f16) -> t_f16);
-        ifn!("llvm.fabs.f32", fn(t_f32) -> t_f32);
-        ifn!("llvm.fabs.f64", fn(t_f64) -> t_f64);
-        ifn!("llvm.fabs.f128", fn(t_f128) -> t_f128);
-
-        ifn!("llvm.minnum.f16", fn(t_f16, t_f16) -> t_f16);
-        ifn!("llvm.minnum.f32", fn(t_f32, t_f32) -> t_f32);
-        ifn!("llvm.minnum.f64", fn(t_f64, t_f64) -> t_f64);
-        ifn!("llvm.minnum.f128", fn(t_f128, t_f128) -> t_f128);
-
-        ifn!("llvm.minimum.f16", fn(t_f16, t_f16) -> t_f16);
-        ifn!("llvm.minimum.f32", fn(t_f32, t_f32) -> t_f32);
-        ifn!("llvm.minimum.f64", fn(t_f64, t_f64) -> t_f64);
-        // There are issues on x86_64 and aarch64 with the f128 variant.
-        //  - https://github.com/llvm/llvm-project/issues/139380
-        //  - https://github.com/llvm/llvm-project/issues/139381
-        // ifn!("llvm.minimum.f128", fn(t_f128, t_f128) -> t_f128);
-
-        ifn!("llvm.maxnum.f16", fn(t_f16, t_f16) -> t_f16);
-        ifn!("llvm.maxnum.f32", fn(t_f32, t_f32) -> t_f32);
-        ifn!("llvm.maxnum.f64", fn(t_f64, t_f64) -> t_f64);
-        ifn!("llvm.maxnum.f128", fn(t_f128, t_f128) -> t_f128);
-
-        ifn!("llvm.maximum.f16", fn(t_f16, t_f16) -> t_f16);
-        ifn!("llvm.maximum.f32", fn(t_f32, t_f32) -> t_f32);
-        ifn!("llvm.maximum.f64", fn(t_f64, t_f64) -> t_f64);
-        // There are issues on x86_64 and aarch64 with the f128 variant.
-        //  - https://github.com/llvm/llvm-project/issues/139380
-        //  - https://github.com/llvm/llvm-project/issues/139381
-        // ifn!("llvm.maximum.f128", fn(t_f128, t_f128) -> t_f128);
-
-        ifn!("llvm.floor.f16", fn(t_f16) -> t_f16);
-        ifn!("llvm.floor.f32", fn(t_f32) -> t_f32);
-        ifn!("llvm.floor.f64", fn(t_f64) -> t_f64);
-        ifn!("llvm.floor.f128", fn(t_f128) -> t_f128);
-
-        ifn!("llvm.ceil.f16", fn(t_f16) -> t_f16);
-        ifn!("llvm.ceil.f32", fn(t_f32) -> t_f32);
-        ifn!("llvm.ceil.f64", fn(t_f64) -> t_f64);
-        ifn!("llvm.ceil.f128", fn(t_f128) -> t_f128);
-
-        ifn!("llvm.trunc.f16", fn(t_f16) -> t_f16);
-        ifn!("llvm.trunc.f32", fn(t_f32) -> t_f32);
-        ifn!("llvm.trunc.f64", fn(t_f64) -> t_f64);
-        ifn!("llvm.trunc.f128", fn(t_f128) -> t_f128);
-
-        ifn!("llvm.copysign.f16", fn(t_f16, t_f16) -> t_f16);
-        ifn!("llvm.copysign.f32", fn(t_f32, t_f32) -> t_f32);
-        ifn!("llvm.copysign.f64", fn(t_f64, t_f64) -> t_f64);
-        ifn!("llvm.copysign.f128", fn(t_f128, t_f128) -> t_f128);
-
-        ifn!("llvm.round.f16", fn(t_f16) -> t_f16);
-        ifn!("llvm.round.f32", fn(t_f32) -> t_f32);
-        ifn!("llvm.round.f64", fn(t_f64) -> t_f64);
-        ifn!("llvm.round.f128", fn(t_f128) -> t_f128);
-
-        ifn!("llvm.roundeven.f16", fn(t_f16) -> t_f16);
-        ifn!("llvm.roundeven.f32", fn(t_f32) -> t_f32);
-        ifn!("llvm.roundeven.f64", fn(t_f64) -> t_f64);
-        ifn!("llvm.roundeven.f128", fn(t_f128) -> t_f128);
-
-        ifn!("llvm.rint.f16", fn(t_f16) -> t_f16);
-        ifn!("llvm.rint.f32", fn(t_f32) -> t_f32);
-        ifn!("llvm.rint.f64", fn(t_f64) -> t_f64);
-        ifn!("llvm.rint.f128", fn(t_f128) -> t_f128);
-
-        ifn!("llvm.nearbyint.f16", fn(t_f16) -> t_f16);
-        ifn!("llvm.nearbyint.f32", fn(t_f32) -> t_f32);
-        ifn!("llvm.nearbyint.f64", fn(t_f64) -> t_f64);
-        ifn!("llvm.nearbyint.f128", fn(t_f128) -> t_f128);
-
-        ifn!("llvm.ctpop.i8", fn(t_i8) -> t_i8);
-        ifn!("llvm.ctpop.i16", fn(t_i16) -> t_i16);
-        ifn!("llvm.ctpop.i32", fn(t_i32) -> t_i32);
-        ifn!("llvm.ctpop.i64", fn(t_i64) -> t_i64);
-        ifn!("llvm.ctpop.i128", fn(t_i128) -> t_i128);
-
-        ifn!("llvm.ctlz.i8", fn(t_i8, i1) -> t_i8);
-        ifn!("llvm.ctlz.i16", fn(t_i16, i1) -> t_i16);
-        ifn!("llvm.ctlz.i32", fn(t_i32, i1) -> t_i32);
-        ifn!("llvm.ctlz.i64", fn(t_i64, i1) -> t_i64);
-        ifn!("llvm.ctlz.i128", fn(t_i128, i1) -> t_i128);
-
-        ifn!("llvm.cttz.i8", fn(t_i8, i1) -> t_i8);
-        ifn!("llvm.cttz.i16", fn(t_i16, i1) -> t_i16);
-        ifn!("llvm.cttz.i32", fn(t_i32, i1) -> t_i32);
-        ifn!("llvm.cttz.i64", fn(t_i64, i1) -> t_i64);
-        ifn!("llvm.cttz.i128", fn(t_i128, i1) -> t_i128);
-
-        ifn!("llvm.bswap.i16", fn(t_i16) -> t_i16);
-        ifn!("llvm.bswap.i32", fn(t_i32) -> t_i32);
-        ifn!("llvm.bswap.i64", fn(t_i64) -> t_i64);
-        ifn!("llvm.bswap.i128", fn(t_i128) -> t_i128);
-
-        ifn!("llvm.bitreverse.i8", fn(t_i8) -> t_i8);
-        ifn!("llvm.bitreverse.i16", fn(t_i16) -> t_i16);
-        ifn!("llvm.bitreverse.i32", fn(t_i32) -> t_i32);
-        ifn!("llvm.bitreverse.i64", fn(t_i64) -> t_i64);
-        ifn!("llvm.bitreverse.i128", fn(t_i128) -> t_i128);
-
-        ifn!("llvm.fshl.i8", fn(t_i8, t_i8, t_i8) -> t_i8);
-        ifn!("llvm.fshl.i16", fn(t_i16, t_i16, t_i16) -> t_i16);
-        ifn!("llvm.fshl.i32", fn(t_i32, t_i32, t_i32) -> t_i32);
-        ifn!("llvm.fshl.i64", fn(t_i64, t_i64, t_i64) -> t_i64);
-        ifn!("llvm.fshl.i128", fn(t_i128, t_i128, t_i128) -> t_i128);
-
-        ifn!("llvm.fshr.i8", fn(t_i8, t_i8, t_i8) -> t_i8);
-        ifn!("llvm.fshr.i16", fn(t_i16, t_i16, t_i16) -> t_i16);
-        ifn!("llvm.fshr.i32", fn(t_i32, t_i32, t_i32) -> t_i32);
-        ifn!("llvm.fshr.i64", fn(t_i64, t_i64, t_i64) -> t_i64);
-        ifn!("llvm.fshr.i128", fn(t_i128, t_i128, t_i128) -> t_i128);
-
-        ifn!("llvm.sadd.with.overflow.i8", fn(t_i8, t_i8) -> mk_struct! {t_i8, i1});
-        ifn!("llvm.sadd.with.overflow.i16", fn(t_i16, t_i16) -> mk_struct! {t_i16, i1});
-        ifn!("llvm.sadd.with.overflow.i32", fn(t_i32, t_i32) -> mk_struct! {t_i32, i1});
-        ifn!("llvm.sadd.with.overflow.i64", fn(t_i64, t_i64) -> mk_struct! {t_i64, i1});
-        ifn!("llvm.sadd.with.overflow.i128", fn(t_i128, t_i128) -> mk_struct! {t_i128, i1});
-
-        ifn!("llvm.uadd.with.overflow.i8", fn(t_i8, t_i8) -> mk_struct! {t_i8, i1});
-        ifn!("llvm.uadd.with.overflow.i16", fn(t_i16, t_i16) -> mk_struct! {t_i16, i1});
-        ifn!("llvm.uadd.with.overflow.i32", fn(t_i32, t_i32) -> mk_struct! {t_i32, i1});
-        ifn!("llvm.uadd.with.overflow.i64", fn(t_i64, t_i64) -> mk_struct! {t_i64, i1});
-        ifn!("llvm.uadd.with.overflow.i128", fn(t_i128, t_i128) -> mk_struct! {t_i128, i1});
-
-        ifn!("llvm.ssub.with.overflow.i8", fn(t_i8, t_i8) -> mk_struct! {t_i8, i1});
-        ifn!("llvm.ssub.with.overflow.i16", fn(t_i16, t_i16) -> mk_struct! {t_i16, i1});
-        ifn!("llvm.ssub.with.overflow.i32", fn(t_i32, t_i32) -> mk_struct! {t_i32, i1});
-        ifn!("llvm.ssub.with.overflow.i64", fn(t_i64, t_i64) -> mk_struct! {t_i64, i1});
-        ifn!("llvm.ssub.with.overflow.i128", fn(t_i128, t_i128) -> mk_struct! {t_i128, i1});
-
-        ifn!("llvm.usub.with.overflow.i8", fn(t_i8, t_i8) -> mk_struct! {t_i8, i1});
-        ifn!("llvm.usub.with.overflow.i16", fn(t_i16, t_i16) -> mk_struct! {t_i16, i1});
-        ifn!("llvm.usub.with.overflow.i32", fn(t_i32, t_i32) -> mk_struct! {t_i32, i1});
-        ifn!("llvm.usub.with.overflow.i64", fn(t_i64, t_i64) -> mk_struct! {t_i64, i1});
-        ifn!("llvm.usub.with.overflow.i128", fn(t_i128, t_i128) -> mk_struct! {t_i128, i1});
-
-        ifn!("llvm.smul.with.overflow.i8", fn(t_i8, t_i8) -> mk_struct! {t_i8, i1});
-        ifn!("llvm.smul.with.overflow.i16", fn(t_i16, t_i16) -> mk_struct! {t_i16, i1});
-        ifn!("llvm.smul.with.overflow.i32", fn(t_i32, t_i32) -> mk_struct! {t_i32, i1});
-        ifn!("llvm.smul.with.overflow.i64", fn(t_i64, t_i64) -> mk_struct! {t_i64, i1});
-        ifn!("llvm.smul.with.overflow.i128", fn(t_i128, t_i128) -> mk_struct! {t_i128, i1});
-
-        ifn!("llvm.umul.with.overflow.i8", fn(t_i8, t_i8) -> mk_struct! {t_i8, i1});
-        ifn!("llvm.umul.with.overflow.i16", fn(t_i16, t_i16) -> mk_struct! {t_i16, i1});
-        ifn!("llvm.umul.with.overflow.i32", fn(t_i32, t_i32) -> mk_struct! {t_i32, i1});
-        ifn!("llvm.umul.with.overflow.i64", fn(t_i64, t_i64) -> mk_struct! {t_i64, i1});
-        ifn!("llvm.umul.with.overflow.i128", fn(t_i128, t_i128) -> mk_struct! {t_i128, i1});
-
-        ifn!("llvm.sadd.sat.i8", fn(t_i8, t_i8) -> t_i8);
-        ifn!("llvm.sadd.sat.i16", fn(t_i16, t_i16) -> t_i16);
-        ifn!("llvm.sadd.sat.i32", fn(t_i32, t_i32) -> t_i32);
-        ifn!("llvm.sadd.sat.i64", fn(t_i64, t_i64) -> t_i64);
-        ifn!("llvm.sadd.sat.i128", fn(t_i128, t_i128) -> t_i128);
-
-        ifn!("llvm.uadd.sat.i8", fn(t_i8, t_i8) -> t_i8);
-        ifn!("llvm.uadd.sat.i16", fn(t_i16, t_i16) -> t_i16);
-        ifn!("llvm.uadd.sat.i32", fn(t_i32, t_i32) -> t_i32);
-        ifn!("llvm.uadd.sat.i64", fn(t_i64, t_i64) -> t_i64);
-        ifn!("llvm.uadd.sat.i128", fn(t_i128, t_i128) -> t_i128);
-
-        ifn!("llvm.ssub.sat.i8", fn(t_i8, t_i8) -> t_i8);
-        ifn!("llvm.ssub.sat.i16", fn(t_i16, t_i16) -> t_i16);
-        ifn!("llvm.ssub.sat.i32", fn(t_i32, t_i32) -> t_i32);
-        ifn!("llvm.ssub.sat.i64", fn(t_i64, t_i64) -> t_i64);
-        ifn!("llvm.ssub.sat.i128", fn(t_i128, t_i128) -> t_i128);
-
-        ifn!("llvm.usub.sat.i8", fn(t_i8, t_i8) -> t_i8);
-        ifn!("llvm.usub.sat.i16", fn(t_i16, t_i16) -> t_i16);
-        ifn!("llvm.usub.sat.i32", fn(t_i32, t_i32) -> t_i32);
-        ifn!("llvm.usub.sat.i64", fn(t_i64, t_i64) -> t_i64);
-        ifn!("llvm.usub.sat.i128", fn(t_i128, t_i128) -> t_i128);
-
-        ifn!("llvm.scmp.i8.i8", fn(t_i8, t_i8) -> t_i8);
-        ifn!("llvm.scmp.i8.i16", fn(t_i16, t_i16) -> t_i8);
-        ifn!("llvm.scmp.i8.i32", fn(t_i32, t_i32) -> t_i8);
-        ifn!("llvm.scmp.i8.i64", fn(t_i64, t_i64) -> t_i8);
-        ifn!("llvm.scmp.i8.i128", fn(t_i128, t_i128) -> t_i8);
-
-        ifn!("llvm.ucmp.i8.i8", fn(t_i8, t_i8) -> t_i8);
-        ifn!("llvm.ucmp.i8.i16", fn(t_i16, t_i16) -> t_i8);
-        ifn!("llvm.ucmp.i8.i32", fn(t_i32, t_i32) -> t_i8);
-        ifn!("llvm.ucmp.i8.i64", fn(t_i64, t_i64) -> t_i8);
-        ifn!("llvm.ucmp.i8.i128", fn(t_i128, t_i128) -> t_i8);
-
-        ifn!("llvm.lifetime.start.p0i8", fn(t_i64, ptr) -> void);
-        ifn!("llvm.lifetime.end.p0i8", fn(t_i64, ptr) -> void);
-
-        // FIXME: This is an infinitesimally small portion of the types you can
-        // pass to this intrinsic, if we can ever lazily register intrinsics we
-        // should register these when they're used, that way any type can be
-        // passed.
-        ifn!("llvm.is.constant.i1", fn(i1) -> i1);
-        ifn!("llvm.is.constant.i8", fn(t_i8) -> i1);
-        ifn!("llvm.is.constant.i16", fn(t_i16) -> i1);
-        ifn!("llvm.is.constant.i32", fn(t_i32) -> i1);
-        ifn!("llvm.is.constant.i64", fn(t_i64) -> i1);
-        ifn!("llvm.is.constant.i128", fn(t_i128) -> i1);
-        ifn!("llvm.is.constant.isize", fn(t_isize) -> i1);
-        ifn!("llvm.is.constant.f16", fn(t_f16) -> i1);
-        ifn!("llvm.is.constant.f32", fn(t_f32) -> i1);
-        ifn!("llvm.is.constant.f64", fn(t_f64) -> i1);
-        ifn!("llvm.is.constant.f128", fn(t_f128) -> i1);
-        ifn!("llvm.is.constant.ptr", fn(ptr) -> i1);
-
-        ifn!("llvm.expect.i1", fn(i1, i1) -> i1);
-        ifn!("llvm.eh.typeid.for", fn(ptr) -> t_i32);
-        ifn!("llvm.localescape", fn(...) -> void);
-        ifn!("llvm.localrecover", fn(ptr, ptr, t_i32) -> ptr);
-        ifn!("llvm.x86.seh.recoverfp", fn(ptr, ptr) -> ptr);
-
-        ifn!("llvm.assume", fn(i1) -> void);
-        ifn!("llvm.prefetch", fn(ptr, t_i32, t_i32, t_i32) -> void);
-
+        base_name: Cow<'static, str>,
+        type_params: &[&'ll Type],
+    ) -> (&'ll Type, &'ll Value) {
+        *self
+            .intrinsics
+            .borrow_mut()
+            .entry((base_name, SmallVec::from_slice(type_params)))
+            .or_insert_with_key(|(base_name, type_params)| {
+                self.declare_intrinsic(base_name, type_params)
+            })
+    }
+
+    fn declare_intrinsic(
+        &self,
+        base_name: &str,
+        type_params: &[&'ll Type],
+    ) -> (&'ll Type, &'ll Value) {
         // This isn't an "LLVM intrinsic", but LLVM's optimization passes
         // recognize it like one (including turning it into `bcmp` sometimes)
         // and we use it to implement intrinsics like `raw_eq` and `compare_bytes`
-        match self.sess().target.arch.as_ref() {
-            "avr" | "msp430" => ifn!("memcmp", fn(ptr, ptr, t_isize) -> t_i16),
-            _ => ifn!("memcmp", fn(ptr, ptr, t_isize) -> t_i32),
-        }
-
-        // variadic intrinsics
-        ifn!("llvm.va_start", fn(ptr) -> void);
-        ifn!("llvm.va_end", fn(ptr) -> void);
-        ifn!("llvm.va_copy", fn(ptr, ptr) -> void);
-
-        if self.sess().instrument_coverage() {
-            ifn!("llvm.instrprof.increment", fn(ptr, t_i64, t_i32, t_i32) -> void);
-            ifn!("llvm.instrprof.mcdc.parameters", fn(ptr, t_i64, t_i32) -> void);
-            ifn!("llvm.instrprof.mcdc.tvbitmap.update", fn(ptr, t_i64, t_i32, ptr) -> void);
-        }
-
-        ifn!("llvm.type.test", fn(ptr, t_metadata) -> i1);
-        ifn!("llvm.type.checked.load", fn(ptr, t_i32, t_metadata) -> mk_struct! {ptr, i1});
+        if base_name == "memcmp" {
+            let fn_ty = self
+                .type_func(&[self.type_ptr(), self.type_ptr(), self.type_isize()], self.type_int());
+            let f = self.declare_cfn("memcmp", llvm::UnnamedAddr::No, fn_ty);
 
-        if self.sess().opts.debuginfo != DebugInfo::None {
-            ifn!("llvm.dbg.declare", fn(t_metadata, t_metadata) -> void);
-            ifn!("llvm.dbg.value", fn(t_metadata, t_i64, t_metadata) -> void);
+            return (fn_ty, f);
         }
 
-        ifn!("llvm.ptrmask", fn(ptr, t_isize) -> ptr);
-        ifn!("llvm.threadlocal.address", fn(ptr) -> ptr);
+        let intrinsic = llvm::Intrinsic::lookup(base_name.as_bytes())
+            .unwrap_or_else(|| bug!("Unknown intrinsic: `{base_name}`"));
+        let f = intrinsic.get_declaration(self.llmod, &type_params);
 
-        None
+        (self.get_type_of_global(f), f)
     }
 
     pub(crate) fn eh_catch_typeinfo(&self) -> &'ll Value {
diff --git a/compiler/rustc_codegen_llvm/src/errors.rs b/compiler/rustc_codegen_llvm/src/errors.rs
index eaafc680712..d50ad8a1a9c 100644
--- a/compiler/rustc_codegen_llvm/src/errors.rs
+++ b/compiler/rustc_codegen_llvm/src/errors.rs
@@ -3,53 +3,12 @@ use std::path::Path;
 
 use rustc_data_structures::small_c_str::SmallCStr;
 use rustc_errors::{Diag, DiagCtxtHandle, Diagnostic, EmissionGuarantee, Level};
-use rustc_macros::{Diagnostic, Subdiagnostic};
+use rustc_macros::Diagnostic;
 use rustc_span::Span;
 
 use crate::fluent_generated as fluent;
 
 #[derive(Diagnostic)]
-#[diag(codegen_llvm_unknown_ctarget_feature_prefix)]
-#[note]
-pub(crate) struct UnknownCTargetFeaturePrefix<'a> {
-    pub feature: &'a str,
-}
-
-#[derive(Diagnostic)]
-#[diag(codegen_llvm_unknown_ctarget_feature)]
-#[note]
-pub(crate) struct UnknownCTargetFeature<'a> {
-    pub feature: &'a str,
-    #[subdiagnostic]
-    pub rust_feature: PossibleFeature<'a>,
-}
-
-#[derive(Diagnostic)]
-#[diag(codegen_llvm_unstable_ctarget_feature)]
-#[note]
-pub(crate) struct UnstableCTargetFeature<'a> {
-    pub feature: &'a str,
-}
-
-#[derive(Diagnostic)]
-#[diag(codegen_llvm_forbidden_ctarget_feature)]
-#[note]
-#[note(codegen_llvm_forbidden_ctarget_feature_issue)]
-pub(crate) struct ForbiddenCTargetFeature<'a> {
-    pub feature: &'a str,
-    pub enabled: &'a str,
-    pub reason: &'a str,
-}
-
-#[derive(Subdiagnostic)]
-pub(crate) enum PossibleFeature<'a> {
-    #[help(codegen_llvm_possible_feature)]
-    Some { rust_feature: &'a str },
-    #[help(codegen_llvm_consider_filing_feature_request)]
-    None,
-}
-
-#[derive(Diagnostic)]
 #[diag(codegen_llvm_symbol_already_defined)]
 pub(crate) struct SymbolAlreadyDefined<'a> {
     #[primary_span]
diff --git a/compiler/rustc_codegen_llvm/src/intrinsic.rs b/compiler/rustc_codegen_llvm/src/intrinsic.rs
index 10697b9a71f..f7f062849a8 100644
--- a/compiler/rustc_codegen_llvm/src/intrinsic.rs
+++ b/compiler/rustc_codegen_llvm/src/intrinsic.rs
@@ -15,7 +15,7 @@ use rustc_middle::ty::{self, GenericArgsRef, Ty};
 use rustc_middle::{bug, span_bug};
 use rustc_span::{Span, Symbol, sym};
 use rustc_symbol_mangling::mangle_internal_symbol;
-use rustc_target::spec::{HasTargetSpec, PanicStrategy};
+use rustc_target::spec::PanicStrategy;
 use tracing::debug;
 
 use crate::abi::FnAbiLlvmExt;
@@ -27,137 +27,140 @@ use crate::type_of::LayoutLlvmExt;
 use crate::va_arg::emit_va_arg;
 use crate::value::Value;
 
-fn get_simple_intrinsic<'ll>(
-    cx: &CodegenCx<'ll, '_>,
+fn call_simple_intrinsic<'ll, 'tcx>(
+    bx: &mut Builder<'_, 'll, 'tcx>,
     name: Symbol,
-) -> Option<(&'ll Type, &'ll Value)> {
-    let llvm_name = match name {
-        sym::sqrtf16 => "llvm.sqrt.f16",
-        sym::sqrtf32 => "llvm.sqrt.f32",
-        sym::sqrtf64 => "llvm.sqrt.f64",
-        sym::sqrtf128 => "llvm.sqrt.f128",
-
-        sym::powif16 => "llvm.powi.f16.i32",
-        sym::powif32 => "llvm.powi.f32.i32",
-        sym::powif64 => "llvm.powi.f64.i32",
-        sym::powif128 => "llvm.powi.f128.i32",
-
-        sym::sinf16 => "llvm.sin.f16",
-        sym::sinf32 => "llvm.sin.f32",
-        sym::sinf64 => "llvm.sin.f64",
-        sym::sinf128 => "llvm.sin.f128",
-
-        sym::cosf16 => "llvm.cos.f16",
-        sym::cosf32 => "llvm.cos.f32",
-        sym::cosf64 => "llvm.cos.f64",
-        sym::cosf128 => "llvm.cos.f128",
-
-        sym::powf16 => "llvm.pow.f16",
-        sym::powf32 => "llvm.pow.f32",
-        sym::powf64 => "llvm.pow.f64",
-        sym::powf128 => "llvm.pow.f128",
-
-        sym::expf16 => "llvm.exp.f16",
-        sym::expf32 => "llvm.exp.f32",
-        sym::expf64 => "llvm.exp.f64",
-        sym::expf128 => "llvm.exp.f128",
-
-        sym::exp2f16 => "llvm.exp2.f16",
-        sym::exp2f32 => "llvm.exp2.f32",
-        sym::exp2f64 => "llvm.exp2.f64",
-        sym::exp2f128 => "llvm.exp2.f128",
-
-        sym::logf16 => "llvm.log.f16",
-        sym::logf32 => "llvm.log.f32",
-        sym::logf64 => "llvm.log.f64",
-        sym::logf128 => "llvm.log.f128",
-
-        sym::log10f16 => "llvm.log10.f16",
-        sym::log10f32 => "llvm.log10.f32",
-        sym::log10f64 => "llvm.log10.f64",
-        sym::log10f128 => "llvm.log10.f128",
-
-        sym::log2f16 => "llvm.log2.f16",
-        sym::log2f32 => "llvm.log2.f32",
-        sym::log2f64 => "llvm.log2.f64",
-        sym::log2f128 => "llvm.log2.f128",
-
-        sym::fmaf16 => "llvm.fma.f16",
-        sym::fmaf32 => "llvm.fma.f32",
-        sym::fmaf64 => "llvm.fma.f64",
-        sym::fmaf128 => "llvm.fma.f128",
-
-        sym::fmuladdf16 => "llvm.fmuladd.f16",
-        sym::fmuladdf32 => "llvm.fmuladd.f32",
-        sym::fmuladdf64 => "llvm.fmuladd.f64",
-        sym::fmuladdf128 => "llvm.fmuladd.f128",
-
-        sym::fabsf16 => "llvm.fabs.f16",
-        sym::fabsf32 => "llvm.fabs.f32",
-        sym::fabsf64 => "llvm.fabs.f64",
-        sym::fabsf128 => "llvm.fabs.f128",
-
-        sym::minnumf16 => "llvm.minnum.f16",
-        sym::minnumf32 => "llvm.minnum.f32",
-        sym::minnumf64 => "llvm.minnum.f64",
-        sym::minnumf128 => "llvm.minnum.f128",
-
-        sym::minimumf16 => "llvm.minimum.f16",
-        sym::minimumf32 => "llvm.minimum.f32",
-        sym::minimumf64 => "llvm.minimum.f64",
+    args: &[OperandRef<'tcx, &'ll Value>],
+) -> Option<&'ll Value> {
+    let (base_name, type_params): (&'static str, &[&'ll Type]) = match name {
+        sym::sqrtf16 => ("llvm.sqrt", &[bx.type_f16()]),
+        sym::sqrtf32 => ("llvm.sqrt", &[bx.type_f32()]),
+        sym::sqrtf64 => ("llvm.sqrt", &[bx.type_f64()]),
+        sym::sqrtf128 => ("llvm.sqrt", &[bx.type_f128()]),
+
+        sym::powif16 => ("llvm.powi", &[bx.type_f16(), bx.type_i32()]),
+        sym::powif32 => ("llvm.powi", &[bx.type_f32(), bx.type_i32()]),
+        sym::powif64 => ("llvm.powi", &[bx.type_f64(), bx.type_i32()]),
+        sym::powif128 => ("llvm.powi", &[bx.type_f128(), bx.type_i32()]),
+
+        sym::sinf16 => ("llvm.sin", &[bx.type_f16()]),
+        sym::sinf32 => ("llvm.sin", &[bx.type_f32()]),
+        sym::sinf64 => ("llvm.sin", &[bx.type_f64()]),
+        sym::sinf128 => ("llvm.sin", &[bx.type_f128()]),
+
+        sym::cosf16 => ("llvm.cos", &[bx.type_f16()]),
+        sym::cosf32 => ("llvm.cos", &[bx.type_f32()]),
+        sym::cosf64 => ("llvm.cos", &[bx.type_f64()]),
+        sym::cosf128 => ("llvm.cos", &[bx.type_f128()]),
+
+        sym::powf16 => ("llvm.pow", &[bx.type_f16()]),
+        sym::powf32 => ("llvm.pow", &[bx.type_f32()]),
+        sym::powf64 => ("llvm.pow", &[bx.type_f64()]),
+        sym::powf128 => ("llvm.pow", &[bx.type_f128()]),
+
+        sym::expf16 => ("llvm.exp", &[bx.type_f16()]),
+        sym::expf32 => ("llvm.exp", &[bx.type_f32()]),
+        sym::expf64 => ("llvm.exp", &[bx.type_f64()]),
+        sym::expf128 => ("llvm.exp", &[bx.type_f128()]),
+
+        sym::exp2f16 => ("llvm.exp2", &[bx.type_f16()]),
+        sym::exp2f32 => ("llvm.exp2", &[bx.type_f32()]),
+        sym::exp2f64 => ("llvm.exp2", &[bx.type_f64()]),
+        sym::exp2f128 => ("llvm.exp2", &[bx.type_f128()]),
+
+        sym::logf16 => ("llvm.log", &[bx.type_f16()]),
+        sym::logf32 => ("llvm.log", &[bx.type_f32()]),
+        sym::logf64 => ("llvm.log", &[bx.type_f64()]),
+        sym::logf128 => ("llvm.log", &[bx.type_f128()]),
+
+        sym::log10f16 => ("llvm.log10", &[bx.type_f16()]),
+        sym::log10f32 => ("llvm.log10", &[bx.type_f32()]),
+        sym::log10f64 => ("llvm.log10", &[bx.type_f64()]),
+        sym::log10f128 => ("llvm.log10", &[bx.type_f128()]),
+
+        sym::log2f16 => ("llvm.log2", &[bx.type_f16()]),
+        sym::log2f32 => ("llvm.log2", &[bx.type_f32()]),
+        sym::log2f64 => ("llvm.log2", &[bx.type_f64()]),
+        sym::log2f128 => ("llvm.log2", &[bx.type_f128()]),
+
+        sym::fmaf16 => ("llvm.fma", &[bx.type_f16()]),
+        sym::fmaf32 => ("llvm.fma", &[bx.type_f32()]),
+        sym::fmaf64 => ("llvm.fma", &[bx.type_f64()]),
+        sym::fmaf128 => ("llvm.fma", &[bx.type_f128()]),
+
+        sym::fmuladdf16 => ("llvm.fmuladd", &[bx.type_f16()]),
+        sym::fmuladdf32 => ("llvm.fmuladd", &[bx.type_f32()]),
+        sym::fmuladdf64 => ("llvm.fmuladd", &[bx.type_f64()]),
+        sym::fmuladdf128 => ("llvm.fmuladd", &[bx.type_f128()]),
+
+        sym::fabsf16 => ("llvm.fabs", &[bx.type_f16()]),
+        sym::fabsf32 => ("llvm.fabs", &[bx.type_f32()]),
+        sym::fabsf64 => ("llvm.fabs", &[bx.type_f64()]),
+        sym::fabsf128 => ("llvm.fabs", &[bx.type_f128()]),
+
+        sym::minnumf16 => ("llvm.minnum", &[bx.type_f16()]),
+        sym::minnumf32 => ("llvm.minnum", &[bx.type_f32()]),
+        sym::minnumf64 => ("llvm.minnum", &[bx.type_f64()]),
+        sym::minnumf128 => ("llvm.minnum", &[bx.type_f128()]),
+
+        sym::minimumf16 => ("llvm.minimum", &[bx.type_f16()]),
+        sym::minimumf32 => ("llvm.minimum", &[bx.type_f32()]),
+        sym::minimumf64 => ("llvm.minimum", &[bx.type_f64()]),
         // There are issues on x86_64 and aarch64 with the f128 variant,
         // let's instead use the instrinsic fallback body.
-        // sym::minimumf128 => "llvm.minimum.f128",
-        sym::maxnumf16 => "llvm.maxnum.f16",
-        sym::maxnumf32 => "llvm.maxnum.f32",
-        sym::maxnumf64 => "llvm.maxnum.f64",
-        sym::maxnumf128 => "llvm.maxnum.f128",
-
-        sym::maximumf16 => "llvm.maximum.f16",
-        sym::maximumf32 => "llvm.maximum.f32",
-        sym::maximumf64 => "llvm.maximum.f64",
+        // sym::minimumf128 => ("llvm.minimum", &[cx.type_f128()]),
+        sym::maxnumf16 => ("llvm.maxnum", &[bx.type_f16()]),
+        sym::maxnumf32 => ("llvm.maxnum", &[bx.type_f32()]),
+        sym::maxnumf64 => ("llvm.maxnum", &[bx.type_f64()]),
+        sym::maxnumf128 => ("llvm.maxnum", &[bx.type_f128()]),
+
+        sym::maximumf16 => ("llvm.maximum", &[bx.type_f16()]),
+        sym::maximumf32 => ("llvm.maximum", &[bx.type_f32()]),
+        sym::maximumf64 => ("llvm.maximum", &[bx.type_f64()]),
         // There are issues on x86_64 and aarch64 with the f128 variant,
         // let's instead use the instrinsic fallback body.
-        // sym::maximumf128 => "llvm.maximum.f128",
-        sym::copysignf16 => "llvm.copysign.f16",
-        sym::copysignf32 => "llvm.copysign.f32",
-        sym::copysignf64 => "llvm.copysign.f64",
-        sym::copysignf128 => "llvm.copysign.f128",
-
-        sym::floorf16 => "llvm.floor.f16",
-        sym::floorf32 => "llvm.floor.f32",
-        sym::floorf64 => "llvm.floor.f64",
-        sym::floorf128 => "llvm.floor.f128",
-
-        sym::ceilf16 => "llvm.ceil.f16",
-        sym::ceilf32 => "llvm.ceil.f32",
-        sym::ceilf64 => "llvm.ceil.f64",
-        sym::ceilf128 => "llvm.ceil.f128",
-
-        sym::truncf16 => "llvm.trunc.f16",
-        sym::truncf32 => "llvm.trunc.f32",
-        sym::truncf64 => "llvm.trunc.f64",
-        sym::truncf128 => "llvm.trunc.f128",
+        // sym::maximumf128 => ("llvm.maximum", &[cx.type_f128()]),
+        sym::copysignf16 => ("llvm.copysign", &[bx.type_f16()]),
+        sym::copysignf32 => ("llvm.copysign", &[bx.type_f32()]),
+        sym::copysignf64 => ("llvm.copysign", &[bx.type_f64()]),
+        sym::copysignf128 => ("llvm.copysign", &[bx.type_f128()]),
+
+        sym::floorf16 => ("llvm.floor", &[bx.type_f16()]),
+        sym::floorf32 => ("llvm.floor", &[bx.type_f32()]),
+        sym::floorf64 => ("llvm.floor", &[bx.type_f64()]),
+        sym::floorf128 => ("llvm.floor", &[bx.type_f128()]),
+
+        sym::ceilf16 => ("llvm.ceil", &[bx.type_f16()]),
+        sym::ceilf32 => ("llvm.ceil", &[bx.type_f32()]),
+        sym::ceilf64 => ("llvm.ceil", &[bx.type_f64()]),
+        sym::ceilf128 => ("llvm.ceil", &[bx.type_f128()]),
+
+        sym::truncf16 => ("llvm.trunc", &[bx.type_f16()]),
+        sym::truncf32 => ("llvm.trunc", &[bx.type_f32()]),
+        sym::truncf64 => ("llvm.trunc", &[bx.type_f64()]),
+        sym::truncf128 => ("llvm.trunc", &[bx.type_f128()]),
 
         // We could use any of `rint`, `nearbyint`, or `roundeven`
         // for this -- they are all identical in semantics when
         // assuming the default FP environment.
         // `rint` is what we used for $forever.
-        sym::round_ties_even_f16 => "llvm.rint.f16",
-        sym::round_ties_even_f32 => "llvm.rint.f32",
-        sym::round_ties_even_f64 => "llvm.rint.f64",
-        sym::round_ties_even_f128 => "llvm.rint.f128",
+        sym::round_ties_even_f16 => ("llvm.rint", &[bx.type_f16()]),
+        sym::round_ties_even_f32 => ("llvm.rint", &[bx.type_f32()]),
+        sym::round_ties_even_f64 => ("llvm.rint", &[bx.type_f64()]),
+        sym::round_ties_even_f128 => ("llvm.rint", &[bx.type_f128()]),
 
-        sym::roundf16 => "llvm.round.f16",
-        sym::roundf32 => "llvm.round.f32",
-        sym::roundf64 => "llvm.round.f64",
-        sym::roundf128 => "llvm.round.f128",
-
-        sym::ptr_mask => "llvm.ptrmask",
+        sym::roundf16 => ("llvm.round", &[bx.type_f16()]),
+        sym::roundf32 => ("llvm.round", &[bx.type_f32()]),
+        sym::roundf64 => ("llvm.round", &[bx.type_f64()]),
+        sym::roundf128 => ("llvm.round", &[bx.type_f128()]),
 
         _ => return None,
     };
-    Some(cx.get_intrinsic(llvm_name))
+    Some(bx.call_intrinsic(
+        base_name,
+        type_params,
+        &args.iter().map(|arg| arg.immediate()).collect::<Vec<_>>(),
+    ))
 }
 
 impl<'ll, 'tcx> IntrinsicCallBuilderMethods<'tcx> for Builder<'_, 'll, 'tcx> {
@@ -173,36 +176,24 @@ impl<'ll, 'tcx> IntrinsicCallBuilderMethods<'tcx> for Builder<'_, 'll, 'tcx> {
         let name = tcx.item_name(instance.def_id());
         let fn_args = instance.args;
 
-        let simple = get_simple_intrinsic(self, name);
+        let simple = call_simple_intrinsic(self, name, args);
         let llval = match name {
-            _ if simple.is_some() => {
-                let (simple_ty, simple_fn) = simple.unwrap();
-                self.call(
-                    simple_ty,
-                    None,
-                    None,
-                    simple_fn,
-                    &args.iter().map(|arg| arg.immediate()).collect::<Vec<_>>(),
-                    None,
-                    Some(instance),
+            _ if simple.is_some() => simple.unwrap(),
+            sym::ptr_mask => {
+                let ptr = args[0].immediate();
+                self.call_intrinsic(
+                    "llvm.ptrmask",
+                    &[self.val_ty(ptr), self.type_isize()],
+                    &[ptr, args[1].immediate()],
                 )
             }
             sym::is_val_statically_known => {
-                let intrinsic_type = args[0].layout.immediate_llvm_type(self.cx);
-                let kind = self.type_kind(intrinsic_type);
-                let intrinsic_name = match kind {
-                    TypeKind::Pointer | TypeKind::Integer => {
-                        Some(format!("llvm.is.constant.{intrinsic_type:?}"))
-                    }
-                    // LLVM float types' intrinsic names differ from their type names.
-                    TypeKind::Half => Some(format!("llvm.is.constant.f16")),
-                    TypeKind::Float => Some(format!("llvm.is.constant.f32")),
-                    TypeKind::Double => Some(format!("llvm.is.constant.f64")),
-                    TypeKind::FP128 => Some(format!("llvm.is.constant.f128")),
-                    _ => None,
-                };
-                if let Some(intrinsic_name) = intrinsic_name {
-                    self.call_intrinsic(&intrinsic_name, &[args[0].immediate()])
+                if let OperandValue::Immediate(imm) = args[0].val {
+                    self.call_intrinsic(
+                        "llvm.is.constant",
+                        &[args[0].layout.immediate_llvm_type(self.cx)],
+                        &[imm],
+                    )
                 } else {
                     self.const_bool(false)
                 }
@@ -246,9 +237,14 @@ impl<'ll, 'tcx> IntrinsicCallBuilderMethods<'tcx> for Builder<'_, 'll, 'tcx> {
                 );
                 return Ok(());
             }
-            sym::breakpoint => self.call_intrinsic("llvm.debugtrap", &[]),
+            sym::breakpoint => self.call_intrinsic("llvm.debugtrap", &[], &[]),
             sym::va_copy => {
-                self.call_intrinsic("llvm.va_copy", &[args[0].immediate(), args[1].immediate()])
+                let dest = args[0].immediate();
+                self.call_intrinsic(
+                    "llvm.va_copy",
+                    &[self.val_ty(dest)],
+                    &[dest, args[1].immediate()],
+                )
             }
             sym::va_arg => {
                 match result.layout.backend_repr {
@@ -322,14 +318,11 @@ impl<'ll, 'tcx> IntrinsicCallBuilderMethods<'tcx> for Builder<'_, 'll, 'tcx> {
                     sym::prefetch_write_instruction => (1, 0),
                     _ => bug!(),
                 };
+                let ptr = args[0].immediate();
                 self.call_intrinsic(
                     "llvm.prefetch",
-                    &[
-                        args[0].immediate(),
-                        self.const_i32(rw),
-                        args[1].immediate(),
-                        self.const_i32(cache_type),
-                    ],
+                    &[self.val_ty(ptr)],
+                    &[ptr, self.const_i32(rw), args[1].immediate(), self.const_i32(cache_type)],
                 )
             }
             sym::carrying_mul_add => {
@@ -385,11 +378,13 @@ impl<'ll, 'tcx> IntrinsicCallBuilderMethods<'tcx> for Builder<'_, 'll, 'tcx> {
                 }
                 let (size, signed) = ty.int_size_and_signed(self.tcx);
                 let width = size.bits();
+                let llty = self.type_ix(width);
                 match name {
                     sym::ctlz | sym::cttz => {
                         let y = self.const_bool(false);
                         let ret = self.call_intrinsic(
-                            &format!("llvm.{name}.i{width}"),
+                            format!("llvm.{name}"),
+                            &[llty],
                             &[args[0].immediate(), y],
                         );
 
@@ -397,62 +392,54 @@ impl<'ll, 'tcx> IntrinsicCallBuilderMethods<'tcx> for Builder<'_, 'll, 'tcx> {
                     }
                     sym::ctlz_nonzero => {
                         let y = self.const_bool(true);
-                        let llvm_name = &format!("llvm.ctlz.i{width}");
-                        let ret = self.call_intrinsic(llvm_name, &[args[0].immediate(), y]);
+                        let ret =
+                            self.call_intrinsic("llvm.ctlz", &[llty], &[args[0].immediate(), y]);
                         self.intcast(ret, result.layout.llvm_type(self), false)
                     }
                     sym::cttz_nonzero => {
                         let y = self.const_bool(true);
-                        let llvm_name = &format!("llvm.cttz.i{width}");
-                        let ret = self.call_intrinsic(llvm_name, &[args[0].immediate(), y]);
+                        let ret =
+                            self.call_intrinsic("llvm.cttz", &[llty], &[args[0].immediate(), y]);
                         self.intcast(ret, result.layout.llvm_type(self), false)
                     }
                     sym::ctpop => {
-                        let ret = self.call_intrinsic(
-                            &format!("llvm.ctpop.i{width}"),
-                            &[args[0].immediate()],
-                        );
+                        let ret =
+                            self.call_intrinsic("llvm.ctpop", &[llty], &[args[0].immediate()]);
                         self.intcast(ret, result.layout.llvm_type(self), false)
                     }
                     sym::bswap => {
                         if width == 8 {
                             args[0].immediate() // byte swap a u8/i8 is just a no-op
                         } else {
-                            self.call_intrinsic(
-                                &format!("llvm.bswap.i{width}"),
-                                &[args[0].immediate()],
-                            )
+                            self.call_intrinsic("llvm.bswap", &[llty], &[args[0].immediate()])
                         }
                     }
-                    sym::bitreverse => self.call_intrinsic(
-                        &format!("llvm.bitreverse.i{width}"),
-                        &[args[0].immediate()],
-                    ),
+                    sym::bitreverse => {
+                        self.call_intrinsic("llvm.bitreverse", &[llty], &[args[0].immediate()])
+                    }
                     sym::rotate_left | sym::rotate_right => {
                         let is_left = name == sym::rotate_left;
                         let val = args[0].immediate();
                         let raw_shift = args[1].immediate();
                         // rotate = funnel shift with first two args the same
-                        let llvm_name =
-                            &format!("llvm.fsh{}.i{}", if is_left { 'l' } else { 'r' }, width);
+                        let llvm_name = format!("llvm.fsh{}", if is_left { 'l' } else { 'r' });
 
                         // llvm expects shift to be the same type as the values, but rust
                         // always uses `u32`.
                         let raw_shift = self.intcast(raw_shift, self.val_ty(val), false);
 
-                        self.call_intrinsic(llvm_name, &[val, val, raw_shift])
+                        self.call_intrinsic(llvm_name, &[llty], &[val, val, raw_shift])
                     }
                     sym::saturating_add | sym::saturating_sub => {
                         let is_add = name == sym::saturating_add;
                         let lhs = args[0].immediate();
                         let rhs = args[1].immediate();
-                        let llvm_name = &format!(
-                            "llvm.{}{}.sat.i{}",
+                        let llvm_name = format!(
+                            "llvm.{}{}.sat",
                             if signed { 's' } else { 'u' },
                             if is_add { "add" } else { "sub" },
-                            width
                         );
-                        self.call_intrinsic(llvm_name, &[lhs, rhs])
+                        self.call_intrinsic(llvm_name, &[llty], &[lhs, rhs])
                     }
                     _ => bug!(),
                 }
@@ -484,11 +471,8 @@ impl<'ll, 'tcx> IntrinsicCallBuilderMethods<'tcx> for Builder<'_, 'll, 'tcx> {
                     self.icmp(IntPredicate::IntEQ, a_val, b_val)
                 } else {
                     let n = self.const_usize(layout.size().bytes());
-                    let cmp = self.call_intrinsic("memcmp", &[a, b, n]);
-                    match self.cx.sess().target.arch.as_ref() {
-                        "avr" | "msp430" => self.icmp(IntPredicate::IntEQ, cmp, self.const_i16(0)),
-                        _ => self.icmp(IntPredicate::IntEQ, cmp, self.const_i32(0)),
-                    }
+                    let cmp = self.call_intrinsic("memcmp", &[], &[a, b, n]);
+                    self.icmp(IntPredicate::IntEQ, cmp, self.const_int(self.type_int(), 0))
                 }
             }
 
@@ -496,6 +480,7 @@ impl<'ll, 'tcx> IntrinsicCallBuilderMethods<'tcx> for Builder<'_, 'll, 'tcx> {
                 // Here we assume that the `memcmp` provided by the target is a NOP for size 0.
                 let cmp = self.call_intrinsic(
                     "memcmp",
+                    &[],
                     &[args[0].immediate(), args[1].immediate(), args[2].immediate()],
                 );
                 // Some targets have `memcmp` returning `i16`, but the intrinsic is always `i32`.
@@ -619,18 +604,22 @@ impl<'ll, 'tcx> IntrinsicCallBuilderMethods<'tcx> for Builder<'_, 'll, 'tcx> {
     }
 
     fn abort(&mut self) {
-        self.call_intrinsic("llvm.trap", &[]);
+        self.call_intrinsic("llvm.trap", &[], &[]);
     }
 
     fn assume(&mut self, val: Self::Value) {
         if self.cx.sess().opts.optimize != rustc_session::config::OptLevel::No {
-            self.call_intrinsic("llvm.assume", &[val]);
+            self.call_intrinsic("llvm.assume", &[], &[val]);
         }
     }
 
     fn expect(&mut self, cond: Self::Value, expected: bool) -> Self::Value {
         if self.cx.sess().opts.optimize != rustc_session::config::OptLevel::No {
-            self.call_intrinsic("llvm.expect.i1", &[cond, self.const_bool(expected)])
+            self.call_intrinsic(
+                "llvm.expect",
+                &[self.type_i1()],
+                &[cond, self.const_bool(expected)],
+            )
         } else {
             cond
         }
@@ -644,17 +633,20 @@ impl<'ll, 'tcx> IntrinsicCallBuilderMethods<'tcx> for Builder<'_, 'll, 'tcx> {
     ) -> Self::Value {
         let typeid = self.get_metadata_value(typeid);
         let vtable_byte_offset = self.const_i32(vtable_byte_offset as i32);
-        let type_checked_load =
-            self.call_intrinsic("llvm.type.checked.load", &[llvtable, vtable_byte_offset, typeid]);
+        let type_checked_load = self.call_intrinsic(
+            "llvm.type.checked.load",
+            &[],
+            &[llvtable, vtable_byte_offset, typeid],
+        );
         self.extract_value(type_checked_load, 0)
     }
 
     fn va_start(&mut self, va_list: &'ll Value) -> &'ll Value {
-        self.call_intrinsic("llvm.va_start", &[va_list])
+        self.call_intrinsic("llvm.va_start", &[self.val_ty(va_list)], &[va_list])
     }
 
     fn va_end(&mut self, va_list: &'ll Value) -> &'ll Value {
-        self.call_intrinsic("llvm.va_end", &[va_list])
+        self.call_intrinsic("llvm.va_end", &[self.val_ty(va_list)], &[va_list])
     }
 }
 
@@ -893,8 +885,8 @@ fn codegen_wasm_try<'ll, 'tcx>(
         let null = bx.const_null(bx.type_ptr());
         let funclet = bx.catch_pad(cs, &[null]);
 
-        let ptr = bx.call_intrinsic("llvm.wasm.get.exception", &[funclet.cleanuppad()]);
-        let _sel = bx.call_intrinsic("llvm.wasm.get.ehselector", &[funclet.cleanuppad()]);
+        let ptr = bx.call_intrinsic("llvm.wasm.get.exception", &[], &[funclet.cleanuppad()]);
+        let _sel = bx.call_intrinsic("llvm.wasm.get.ehselector", &[], &[funclet.cleanuppad()]);
 
         let catch_ty = bx.type_func(&[bx.type_ptr(), bx.type_ptr()], bx.type_void());
         bx.call(catch_ty, None, None, catch_func, &[data, ptr], Some(&funclet), None);
@@ -1031,7 +1023,7 @@ fn codegen_emcc_try<'ll, 'tcx>(
         let selector = bx.extract_value(vals, 1);
 
         // Check if the typeid we got is the one for a Rust panic.
-        let rust_typeid = bx.call_intrinsic("llvm.eh.typeid.for", &[tydesc]);
+        let rust_typeid = bx.call_intrinsic("llvm.eh.typeid.for", &[bx.val_ty(tydesc)], &[tydesc]);
         let is_rust_panic = bx.icmp(IntPredicate::IntEQ, selector, rust_typeid);
         let is_rust_panic = bx.zext(is_rust_panic, bx.type_bool());
 
@@ -1522,56 +1514,37 @@ fn generic_simd_intrinsic<'ll, 'tcx>(
             }};
         }
 
-        let (elem_ty_str, elem_ty) = if let ty::Float(f) = in_elem.kind() {
-            let elem_ty = bx.cx.type_float_from_ty(*f);
-            match f.bit_width() {
-                16 => ("f16", elem_ty),
-                32 => ("f32", elem_ty),
-                64 => ("f64", elem_ty),
-                128 => ("f128", elem_ty),
-                _ => return_error!(InvalidMonomorphization::FloatingPointVector {
-                    span,
-                    name,
-                    f_ty: *f,
-                    in_ty,
-                }),
-            }
+        let elem_ty = if let ty::Float(f) = in_elem.kind() {
+            bx.cx.type_float_from_ty(*f)
         } else {
             return_error!(InvalidMonomorphization::FloatingPointType { span, name, in_ty });
         };
 
         let vec_ty = bx.type_vector(elem_ty, in_len);
 
-        let (intr_name, fn_ty) = match name {
-            sym::simd_ceil => ("ceil", bx.type_func(&[vec_ty], vec_ty)),
-            sym::simd_fabs => ("fabs", bx.type_func(&[vec_ty], vec_ty)),
-            sym::simd_fcos => ("cos", bx.type_func(&[vec_ty], vec_ty)),
-            sym::simd_fexp2 => ("exp2", bx.type_func(&[vec_ty], vec_ty)),
-            sym::simd_fexp => ("exp", bx.type_func(&[vec_ty], vec_ty)),
-            sym::simd_flog10 => ("log10", bx.type_func(&[vec_ty], vec_ty)),
-            sym::simd_flog2 => ("log2", bx.type_func(&[vec_ty], vec_ty)),
-            sym::simd_flog => ("log", bx.type_func(&[vec_ty], vec_ty)),
-            sym::simd_floor => ("floor", bx.type_func(&[vec_ty], vec_ty)),
-            sym::simd_fma => ("fma", bx.type_func(&[vec_ty, vec_ty, vec_ty], vec_ty)),
-            sym::simd_relaxed_fma => ("fmuladd", bx.type_func(&[vec_ty, vec_ty, vec_ty], vec_ty)),
-            sym::simd_fsin => ("sin", bx.type_func(&[vec_ty], vec_ty)),
-            sym::simd_fsqrt => ("sqrt", bx.type_func(&[vec_ty], vec_ty)),
-            sym::simd_round => ("round", bx.type_func(&[vec_ty], vec_ty)),
-            sym::simd_trunc => ("trunc", bx.type_func(&[vec_ty], vec_ty)),
+        let intr_name = match name {
+            sym::simd_ceil => "llvm.ceil",
+            sym::simd_fabs => "llvm.fabs",
+            sym::simd_fcos => "llvm.cos",
+            sym::simd_fexp2 => "llvm.exp2",
+            sym::simd_fexp => "llvm.exp",
+            sym::simd_flog10 => "llvm.log10",
+            sym::simd_flog2 => "llvm.log2",
+            sym::simd_flog => "llvm.log",
+            sym::simd_floor => "llvm.floor",
+            sym::simd_fma => "llvm.fma",
+            sym::simd_relaxed_fma => "llvm.fmuladd",
+            sym::simd_fsin => "llvm.sin",
+            sym::simd_fsqrt => "llvm.sqrt",
+            sym::simd_round => "llvm.round",
+            sym::simd_trunc => "llvm.trunc",
             _ => return_error!(InvalidMonomorphization::UnrecognizedIntrinsic { span, name }),
         };
-        let llvm_name = &format!("llvm.{intr_name}.v{in_len}{elem_ty_str}");
-        let f = bx.declare_cfn(llvm_name, llvm::UnnamedAddr::No, fn_ty);
-        let c = bx.call(
-            fn_ty,
-            None,
-            None,
-            f,
+        Ok(bx.call_intrinsic(
+            intr_name,
+            &[vec_ty],
             &args.iter().map(|arg| arg.immediate()).collect::<Vec<_>>(),
-            None,
-            None,
-        );
-        Ok(c)
+        ))
     }
 
     if std::matches!(
@@ -1595,29 +1568,6 @@ fn generic_simd_intrinsic<'ll, 'tcx>(
         return simd_simple_float_intrinsic(name, in_elem, in_ty, in_len, bx, span, args);
     }
 
-    // FIXME: use:
-    //  https://github.com/llvm-mirror/llvm/blob/master/include/llvm/IR/Function.h#L182
-    //  https://github.com/llvm-mirror/llvm/blob/master/include/llvm/IR/Intrinsics.h#L81
-    fn llvm_vector_str(bx: &Builder<'_, '_, '_>, elem_ty: Ty<'_>, vec_len: u64) -> String {
-        match *elem_ty.kind() {
-            ty::Int(v) => format!(
-                "v{}i{}",
-                vec_len,
-                // Normalize to prevent crash if v: IntTy::Isize
-                v.normalize(bx.target_spec().pointer_width).bit_width().unwrap()
-            ),
-            ty::Uint(v) => format!(
-                "v{}i{}",
-                vec_len,
-                // Normalize to prevent crash if v: UIntTy::Usize
-                v.normalize(bx.target_spec().pointer_width).bit_width().unwrap()
-            ),
-            ty::Float(v) => format!("v{}f{}", vec_len, v.bit_width()),
-            ty::RawPtr(_, _) => format!("v{}p0", vec_len),
-            _ => unreachable!(),
-        }
-    }
-
     fn llvm_vector_ty<'ll>(cx: &CodegenCx<'ll, '_>, elem_ty: Ty<'_>, vec_len: u64) -> &'ll Type {
         let elem_ty = match *elem_ty.kind() {
             ty::Int(v) => cx.type_int_from_ty(v),
@@ -1698,38 +1648,22 @@ fn generic_simd_intrinsic<'ll, 'tcx>(
         );
 
         // Alignment of T, must be a constant integer value:
-        let alignment_ty = bx.type_i32();
         let alignment = bx.const_i32(bx.align_of(in_elem).bytes() as i32);
 
         // Truncate the mask vector to a vector of i1s:
         let mask = vector_mask_to_bitmask(bx, args[2].immediate(), mask_elem_bitwidth, in_len);
-        let mask_ty = bx.type_vector(bx.type_i1(), in_len);
 
         // Type of the vector of pointers:
         let llvm_pointer_vec_ty = llvm_vector_ty(bx, element_ty1, in_len);
-        let llvm_pointer_vec_str = llvm_vector_str(bx, element_ty1, in_len);
 
         // Type of the vector of elements:
         let llvm_elem_vec_ty = llvm_vector_ty(bx, element_ty0, in_len);
-        let llvm_elem_vec_str = llvm_vector_str(bx, element_ty0, in_len);
 
-        let llvm_intrinsic =
-            format!("llvm.masked.gather.{llvm_elem_vec_str}.{llvm_pointer_vec_str}");
-        let fn_ty = bx.type_func(
-            &[llvm_pointer_vec_ty, alignment_ty, mask_ty, llvm_elem_vec_ty],
-            llvm_elem_vec_ty,
-        );
-        let f = bx.declare_cfn(&llvm_intrinsic, llvm::UnnamedAddr::No, fn_ty);
-        let v = bx.call(
-            fn_ty,
-            None,
-            None,
-            f,
+        return Ok(bx.call_intrinsic(
+            "llvm.masked.gather",
+            &[llvm_elem_vec_ty, llvm_pointer_vec_ty],
             &[args[1].immediate(), alignment, mask, args[0].immediate()],
-            None,
-            None,
-        );
-        return Ok(v);
+        ));
     }
 
     if name == sym::simd_masked_load {
@@ -1795,32 +1729,20 @@ fn generic_simd_intrinsic<'ll, 'tcx>(
         );
 
         let mask = vector_mask_to_bitmask(bx, args[0].immediate(), m_elem_bitwidth, mask_len);
-        let mask_ty = bx.type_vector(bx.type_i1(), mask_len);
 
         // Alignment of T, must be a constant integer value:
-        let alignment_ty = bx.type_i32();
         let alignment = bx.const_i32(bx.align_of(values_elem).bytes() as i32);
 
         let llvm_pointer = bx.type_ptr();
 
         // Type of the vector of elements:
         let llvm_elem_vec_ty = llvm_vector_ty(bx, values_elem, values_len);
-        let llvm_elem_vec_str = llvm_vector_str(bx, values_elem, values_len);
-
-        let llvm_intrinsic = format!("llvm.masked.load.{llvm_elem_vec_str}.p0");
-        let fn_ty = bx
-            .type_func(&[llvm_pointer, alignment_ty, mask_ty, llvm_elem_vec_ty], llvm_elem_vec_ty);
-        let f = bx.declare_cfn(&llvm_intrinsic, llvm::UnnamedAddr::No, fn_ty);
-        let v = bx.call(
-            fn_ty,
-            None,
-            None,
-            f,
+
+        return Ok(bx.call_intrinsic(
+            "llvm.masked.load",
+            &[llvm_elem_vec_ty, llvm_pointer],
             &[args[1].immediate(), alignment, mask, args[2].immediate()],
-            None,
-            None,
-        );
-        return Ok(v);
+        ));
     }
 
     if name == sym::simd_masked_store {
@@ -1880,33 +1802,20 @@ fn generic_simd_intrinsic<'ll, 'tcx>(
         );
 
         let mask = vector_mask_to_bitmask(bx, args[0].immediate(), m_elem_bitwidth, mask_len);
-        let mask_ty = bx.type_vector(bx.type_i1(), mask_len);
 
         // Alignment of T, must be a constant integer value:
-        let alignment_ty = bx.type_i32();
         let alignment = bx.const_i32(bx.align_of(values_elem).bytes() as i32);
 
-        let ret_t = bx.type_void();
-
         let llvm_pointer = bx.type_ptr();
 
         // Type of the vector of elements:
         let llvm_elem_vec_ty = llvm_vector_ty(bx, values_elem, values_len);
-        let llvm_elem_vec_str = llvm_vector_str(bx, values_elem, values_len);
-
-        let llvm_intrinsic = format!("llvm.masked.store.{llvm_elem_vec_str}.p0");
-        let fn_ty = bx.type_func(&[llvm_elem_vec_ty, llvm_pointer, alignment_ty, mask_ty], ret_t);
-        let f = bx.declare_cfn(&llvm_intrinsic, llvm::UnnamedAddr::No, fn_ty);
-        let v = bx.call(
-            fn_ty,
-            None,
-            None,
-            f,
+
+        return Ok(bx.call_intrinsic(
+            "llvm.masked.store",
+            &[llvm_elem_vec_ty, llvm_pointer],
             &[args[2].immediate(), args[1].immediate(), alignment, mask],
-            None,
-            None,
-        );
-        return Ok(v);
+        ));
     }
 
     if name == sym::simd_scatter {
@@ -1971,38 +1880,22 @@ fn generic_simd_intrinsic<'ll, 'tcx>(
         );
 
         // Alignment of T, must be a constant integer value:
-        let alignment_ty = bx.type_i32();
         let alignment = bx.const_i32(bx.align_of(in_elem).bytes() as i32);
 
         // Truncate the mask vector to a vector of i1s:
         let mask = vector_mask_to_bitmask(bx, args[2].immediate(), mask_elem_bitwidth, in_len);
-        let mask_ty = bx.type_vector(bx.type_i1(), in_len);
-
-        let ret_t = bx.type_void();
 
         // Type of the vector of pointers:
         let llvm_pointer_vec_ty = llvm_vector_ty(bx, element_ty1, in_len);
-        let llvm_pointer_vec_str = llvm_vector_str(bx, element_ty1, in_len);
 
         // Type of the vector of elements:
         let llvm_elem_vec_ty = llvm_vector_ty(bx, element_ty0, in_len);
-        let llvm_elem_vec_str = llvm_vector_str(bx, element_ty0, in_len);
-
-        let llvm_intrinsic =
-            format!("llvm.masked.scatter.{llvm_elem_vec_str}.{llvm_pointer_vec_str}");
-        let fn_ty =
-            bx.type_func(&[llvm_elem_vec_ty, llvm_pointer_vec_ty, alignment_ty, mask_ty], ret_t);
-        let f = bx.declare_cfn(&llvm_intrinsic, llvm::UnnamedAddr::No, fn_ty);
-        let v = bx.call(
-            fn_ty,
-            None,
-            None,
-            f,
+
+        return Ok(bx.call_intrinsic(
+            "llvm.masked.scatter",
+            &[llvm_elem_vec_ty, llvm_pointer_vec_ty],
             &[args[0].immediate(), args[1].immediate(), alignment, mask],
-            None,
-            None,
-        );
-        return Ok(v);
+        ));
     }
 
     macro_rules! arith_red {
@@ -2431,40 +2324,31 @@ fn generic_simd_intrinsic<'ll, 'tcx>(
             },
             in_len as u64,
         );
-        let intrinsic_name = match name {
-            sym::simd_bswap => "bswap",
-            sym::simd_bitreverse => "bitreverse",
-            sym::simd_ctlz => "ctlz",
-            sym::simd_ctpop => "ctpop",
-            sym::simd_cttz => "cttz",
+        let llvm_intrinsic = match name {
+            sym::simd_bswap => "llvm.bswap",
+            sym::simd_bitreverse => "llvm.bitreverse",
+            sym::simd_ctlz => "llvm.ctlz",
+            sym::simd_ctpop => "llvm.ctpop",
+            sym::simd_cttz => "llvm.cttz",
             _ => unreachable!(),
         };
         let int_size = in_elem.int_size_and_signed(bx.tcx()).0.bits();
-        let llvm_intrinsic = &format!("llvm.{}.v{}i{}", intrinsic_name, in_len, int_size,);
 
         return match name {
             // byte swap is no-op for i8/u8
             sym::simd_bswap if int_size == 8 => Ok(args[0].immediate()),
             sym::simd_ctlz | sym::simd_cttz => {
                 // for the (int, i1 immediate) pair, the second arg adds `(0, true) => poison`
-                let fn_ty = bx.type_func(&[vec_ty, bx.type_i1()], vec_ty);
                 let dont_poison_on_zero = bx.const_int(bx.type_i1(), 0);
-                let f = bx.declare_cfn(llvm_intrinsic, llvm::UnnamedAddr::No, fn_ty);
-                Ok(bx.call(
-                    fn_ty,
-                    None,
-                    None,
-                    f,
+                Ok(bx.call_intrinsic(
+                    llvm_intrinsic,
+                    &[vec_ty],
                     &[args[0].immediate(), dont_poison_on_zero],
-                    None,
-                    None,
                 ))
             }
             sym::simd_bswap | sym::simd_bitreverse | sym::simd_ctpop => {
                 // simple unary argument cases
-                let fn_ty = bx.type_func(&[vec_ty], vec_ty);
-                let f = bx.declare_cfn(llvm_intrinsic, llvm::UnnamedAddr::No, fn_ty);
-                Ok(bx.call(fn_ty, None, None, f, &[args[0].immediate()], None, None))
+                Ok(bx.call_intrinsic(llvm_intrinsic, &[vec_ty], &[args[0].immediate()]))
             }
             _ => unreachable!(),
         };
@@ -2495,10 +2379,9 @@ fn generic_simd_intrinsic<'ll, 'tcx>(
         let lhs = args[0].immediate();
         let rhs = args[1].immediate();
         let is_add = name == sym::simd_saturating_add;
-        let ptr_bits = bx.tcx().data_layout.pointer_size.bits() as _;
-        let (signed, elem_width, elem_ty) = match *in_elem.kind() {
-            ty::Int(i) => (true, i.bit_width().unwrap_or(ptr_bits), bx.cx.type_int_from_ty(i)),
-            ty::Uint(i) => (false, i.bit_width().unwrap_or(ptr_bits), bx.cx.type_uint_from_ty(i)),
+        let (signed, elem_ty) = match *in_elem.kind() {
+            ty::Int(i) => (true, bx.cx.type_int_from_ty(i)),
+            ty::Uint(i) => (false, bx.cx.type_uint_from_ty(i)),
             _ => {
                 return_error!(InvalidMonomorphization::ExpectedVectorElementType {
                     span,
@@ -2508,19 +2391,14 @@ fn generic_simd_intrinsic<'ll, 'tcx>(
                 });
             }
         };
-        let llvm_intrinsic = &format!(
-            "llvm.{}{}.sat.v{}i{}",
+        let llvm_intrinsic = format!(
+            "llvm.{}{}.sat",
             if signed { 's' } else { 'u' },
             if is_add { "add" } else { "sub" },
-            in_len,
-            elem_width
         );
         let vec_ty = bx.cx.type_vector(elem_ty, in_len as u64);
 
-        let fn_ty = bx.type_func(&[vec_ty, vec_ty], vec_ty);
-        let f = bx.declare_cfn(llvm_intrinsic, llvm::UnnamedAddr::No, fn_ty);
-        let v = bx.call(fn_ty, None, None, f, &[lhs, rhs], None, None);
-        return Ok(v);
+        return Ok(bx.call_intrinsic(llvm_intrinsic, &[vec_ty], &[lhs, rhs]));
     }
 
     span_bug!(span, "unknown SIMD intrinsic");
diff --git a/compiler/rustc_codegen_llvm/src/lib.rs b/compiler/rustc_codegen_llvm/src/lib.rs
index 6890923a594..cdfffbe47bf 100644
--- a/compiler/rustc_codegen_llvm/src/lib.rs
+++ b/compiler/rustc_codegen_llvm/src/lib.rs
@@ -340,18 +340,11 @@ impl CodegenBackend for LlvmCodegenBackend {
         target_config(sess)
     }
 
-    fn codegen_crate<'tcx>(
-        &self,
-        tcx: TyCtxt<'tcx>,
-        metadata: EncodedMetadata,
-        need_metadata_module: bool,
-    ) -> Box<dyn Any> {
+    fn codegen_crate<'tcx>(&self, tcx: TyCtxt<'tcx>) -> Box<dyn Any> {
         Box::new(rustc_codegen_ssa::base::codegen_crate(
             LlvmCodegenBackend(()),
             tcx,
             crate::llvm_util::target_cpu(tcx.sess).to_string(),
-            metadata,
-            need_metadata_module,
         ))
     }
 
@@ -376,14 +369,20 @@ impl CodegenBackend for LlvmCodegenBackend {
         (codegen_results, work_products)
     }
 
-    fn link(&self, sess: &Session, codegen_results: CodegenResults, outputs: &OutputFilenames) {
+    fn link(
+        &self,
+        sess: &Session,
+        codegen_results: CodegenResults,
+        metadata: EncodedMetadata,
+        outputs: &OutputFilenames,
+    ) {
         use rustc_codegen_ssa::back::link::link_binary;
 
         use crate::back::archive::LlvmArchiveBuilderBuilder;
 
         // Run the linker on any artifacts that resulted from the LLVM run.
         // This should produce either a finished executable or library.
-        link_binary(sess, &LlvmArchiveBuilderBuilder, codegen_results, outputs);
+        link_binary(sess, &LlvmArchiveBuilderBuilder, codegen_results, metadata, outputs);
     }
 }
 
diff --git a/compiler/rustc_codegen_llvm/src/llvm/enzyme_ffi.rs b/compiler/rustc_codegen_llvm/src/llvm/enzyme_ffi.rs
index 2ad39fc8538..b94716b89d6 100644
--- a/compiler/rustc_codegen_llvm/src/llvm/enzyme_ffi.rs
+++ b/compiler/rustc_codegen_llvm/src/llvm/enzyme_ffi.rs
@@ -57,14 +57,19 @@ pub(crate) use self::Enzyme_AD::*;
 
 #[cfg(llvm_enzyme)]
 pub(crate) mod Enzyme_AD {
+    use std::ffi::{CString, c_char};
+
     use libc::c_void;
+
     unsafe extern "C" {
         pub(crate) fn EnzymeSetCLBool(arg1: *mut ::std::os::raw::c_void, arg2: u8);
+        pub(crate) fn EnzymeSetCLString(arg1: *mut ::std::os::raw::c_void, arg2: *const c_char);
     }
     unsafe extern "C" {
         static mut EnzymePrintPerf: c_void;
         static mut EnzymePrintActivity: c_void;
         static mut EnzymePrintType: c_void;
+        static mut EnzymeFunctionToAnalyze: c_void;
         static mut EnzymePrint: c_void;
         static mut EnzymeStrictAliasing: c_void;
         static mut looseTypeAnalysis: c_void;
@@ -86,6 +91,15 @@ pub(crate) mod Enzyme_AD {
             EnzymeSetCLBool(std::ptr::addr_of_mut!(EnzymePrintType), print as u8);
         }
     }
+    pub(crate) fn set_print_type_fun(fun_name: &str) {
+        let c_fun_name = CString::new(fun_name).unwrap();
+        unsafe {
+            EnzymeSetCLString(
+                std::ptr::addr_of_mut!(EnzymeFunctionToAnalyze),
+                c_fun_name.as_ptr() as *const c_char,
+            );
+        }
+    }
     pub(crate) fn set_print(print: bool) {
         unsafe {
             EnzymeSetCLBool(std::ptr::addr_of_mut!(EnzymePrint), print as u8);
@@ -132,6 +146,9 @@ pub(crate) mod Fallback_AD {
     pub(crate) fn set_print_type(print: bool) {
         unimplemented!()
     }
+    pub(crate) fn set_print_type_fun(fun_name: &str) {
+        unimplemented!()
+    }
     pub(crate) fn set_print(print: bool) {
         unimplemented!()
     }
diff --git a/compiler/rustc_codegen_llvm/src/llvm/ffi.rs b/compiler/rustc_codegen_llvm/src/llvm/ffi.rs
index e27fbf94f34..91ada856d59 100644
--- a/compiler/rustc_codegen_llvm/src/llvm/ffi.rs
+++ b/compiler/rustc_codegen_llvm/src/llvm/ffi.rs
@@ -15,6 +15,7 @@
 
 use std::fmt::Debug;
 use std::marker::PhantomData;
+use std::num::NonZero;
 use std::ptr;
 
 use bitflags::bitflags;
@@ -1077,8 +1078,6 @@ unsafe extern "C" {
 
     // Operations on other types
     pub(crate) fn LLVMVoidTypeInContext(C: &Context) -> &Type;
-    pub(crate) fn LLVMTokenTypeInContext(C: &Context) -> &Type;
-    pub(crate) fn LLVMMetadataTypeInContext(C: &Context) -> &Type;
 
     // Operations on all values
     pub(crate) fn LLVMTypeOf(Val: &Value) -> &Type;
@@ -1195,6 +1194,15 @@ unsafe extern "C" {
     // Operations on functions
     pub(crate) fn LLVMSetFunctionCallConv(Fn: &Value, CC: c_uint);
 
+    // Operations about llvm intrinsics
+    pub(crate) fn LLVMLookupIntrinsicID(Name: *const c_char, NameLen: size_t) -> c_uint;
+    pub(crate) fn LLVMGetIntrinsicDeclaration<'a>(
+        Mod: &'a Module,
+        ID: NonZero<c_uint>,
+        ParamTypes: *const &'a Type,
+        ParamCount: size_t,
+    ) -> &'a Value;
+
     // Operations on parameters
     pub(crate) fn LLVMIsAArgument(Val: &Value) -> Option<&Value>;
     pub(crate) safe fn LLVMCountParams(Fn: &Value) -> c_uint;
diff --git a/compiler/rustc_codegen_llvm/src/llvm/mod.rs b/compiler/rustc_codegen_llvm/src/llvm/mod.rs
index ed23f911930..661174a80df 100644
--- a/compiler/rustc_codegen_llvm/src/llvm/mod.rs
+++ b/compiler/rustc_codegen_llvm/src/llvm/mod.rs
@@ -1,6 +1,7 @@
 #![allow(non_snake_case)]
 
 use std::ffi::{CStr, CString};
+use std::num::NonZero;
 use std::ptr;
 use std::str::FromStr;
 use std::string::FromUtf8Error;
@@ -327,6 +328,28 @@ pub(crate) fn get_value_name(value: &Value) -> &[u8] {
     }
 }
 
+#[derive(Debug, Copy, Clone)]
+pub(crate) struct Intrinsic {
+    id: NonZero<c_uint>,
+}
+
+impl Intrinsic {
+    pub(crate) fn lookup(name: &[u8]) -> Option<Self> {
+        let id = unsafe { LLVMLookupIntrinsicID(name.as_c_char_ptr(), name.len()) };
+        NonZero::new(id).map(|id| Self { id })
+    }
+
+    pub(crate) fn get_declaration<'ll>(
+        self,
+        llmod: &'ll Module,
+        type_params: &[&'ll Type],
+    ) -> &'ll Value {
+        unsafe {
+            LLVMGetIntrinsicDeclaration(llmod, self.id, type_params.as_ptr(), type_params.len())
+        }
+    }
+}
+
 /// Safe wrapper for `LLVMSetValueName2` from a byte slice
 pub(crate) fn set_value_name(value: &Value, name: &[u8]) {
     unsafe {
diff --git a/compiler/rustc_codegen_llvm/src/llvm_util.rs b/compiler/rustc_codegen_llvm/src/llvm_util.rs
index 9718c95f38a..6fd07d562af 100644
--- a/compiler/rustc_codegen_llvm/src/llvm_util.rs
+++ b/compiler/rustc_codegen_llvm/src/llvm_util.rs
@@ -6,27 +6,20 @@ use std::sync::Once;
 use std::{ptr, slice, str};
 
 use libc::c_int;
-use rustc_codegen_ssa::TargetConfig;
 use rustc_codegen_ssa::base::wants_wasm_eh;
-use rustc_codegen_ssa::codegen_attrs::check_tied_features;
-use rustc_data_structures::fx::{FxHashMap, FxHashSet};
+use rustc_codegen_ssa::target_features::cfg_target_feature;
+use rustc_codegen_ssa::{TargetConfig, target_features};
+use rustc_data_structures::fx::FxHashSet;
 use rustc_data_structures::small_c_str::SmallCStr;
-use rustc_data_structures::unord::UnordSet;
 use rustc_fs_util::path_to_c_string;
 use rustc_middle::bug;
 use rustc_session::Session;
 use rustc_session::config::{PrintKind, PrintRequest};
-use rustc_span::Symbol;
 use rustc_target::spec::{MergeFunctions, PanicStrategy, SmallDataThresholdSupport};
-use rustc_target::target_features::{RUSTC_SPECIAL_FEATURES, RUSTC_SPECIFIC_FEATURES};
 use smallvec::{SmallVec, smallvec};
 
 use crate::back::write::create_informational_target_machine;
-use crate::errors::{
-    FixedX18InvalidArch, ForbiddenCTargetFeature, PossibleFeature, UnknownCTargetFeature,
-    UnknownCTargetFeaturePrefix, UnstableCTargetFeature,
-};
-use crate::llvm;
+use crate::{errors, llvm};
 
 static INIT: Once = Once::new();
 
@@ -195,15 +188,6 @@ impl<'a> LLVMFeature<'a> {
     ) -> Self {
         Self { llvm_feature_name, dependencies }
     }
-
-    fn contains(&'a self, feat: &str) -> bool {
-        self.iter().any(|dep| dep == feat)
-    }
-
-    fn iter(&'a self) -> impl Iterator<Item = &'a str> {
-        let dependencies = self.dependencies.iter().map(|feat| feat.as_str());
-        std::iter::once(self.llvm_feature_name).chain(dependencies)
-    }
 }
 
 impl<'a> IntoIterator for LLVMFeature<'a> {
@@ -216,18 +200,22 @@ impl<'a> IntoIterator for LLVMFeature<'a> {
     }
 }
 
-// WARNING: the features after applying `to_llvm_features` must be known
-// to LLVM or the feature detection code will walk past the end of the feature
-// array, leading to crashes.
-//
-// To find a list of LLVM's names, see llvm-project/llvm/lib/Target/{ARCH}/*.td
-// where `{ARCH}` is the architecture name. Look for instances of `SubtargetFeature`.
-//
-// Check the current rustc fork of LLVM in the repo at https://github.com/rust-lang/llvm-project/.
-// The commit in use can be found via the `llvm-project` submodule in
-// https://github.com/rust-lang/rust/tree/master/src Though note that Rust can also be build with
-// an external precompiled version of LLVM which might lead to failures if the oldest tested /
-// supported LLVM version doesn't yet support the relevant intrinsics.
+/// Convert a Rust feature name to an LLVM feature name. Returning `None` means the
+/// feature should be skipped, usually because it is not supported by the current
+/// LLVM version.
+///
+/// WARNING: the features after applying `to_llvm_features` must be known
+/// to LLVM or the feature detection code will walk past the end of the feature
+/// array, leading to crashes.
+///
+/// To find a list of LLVM's names, see llvm-project/llvm/lib/Target/{ARCH}/*.td
+/// where `{ARCH}` is the architecture name. Look for instances of `SubtargetFeature`.
+///
+/// Check the current rustc fork of LLVM in the repo at
+/// <https://github.com/rust-lang/llvm-project/>. The commit in use can be found via the
+/// `llvm-project` submodule in <https://github.com/rust-lang/rust/tree/master/src> Though note that
+/// Rust can also be build with an external precompiled version of LLVM which might lead to failures
+/// if the oldest tested / supported LLVM version doesn't yet support the relevant intrinsics.
 pub(crate) fn to_llvm_features<'a>(sess: &Session, s: &'a str) -> Option<LLVMFeature<'a>> {
     let arch = if sess.target.arch == "x86_64" {
         "x86"
@@ -343,98 +331,25 @@ pub(crate) fn target_config(sess: &Session) -> TargetConfig {
     // the target CPU, that is still expanded to target features (with all their implied features)
     // by LLVM.
     let target_machine = create_informational_target_machine(sess, true);
-    // Compute which of the known target features are enabled in the 'base' target machine. We only
-    // consider "supported" features; "forbidden" features are not reflected in `cfg` as of now.
-    let mut features: FxHashSet<Symbol> = sess
-        .target
-        .rust_target_features()
-        .iter()
-        .filter(|(feature, _, _)| {
-            // skip checking special features, as LLVM may not understand them
-            if RUSTC_SPECIAL_FEATURES.contains(feature) {
-                return true;
-            }
-            if let Some(feat) = to_llvm_features(sess, feature) {
-                for llvm_feature in feat {
-                    let cstr = SmallCStr::new(llvm_feature);
-                    // `LLVMRustHasFeature` is moderately expensive. On targets with many
-                    // features (e.g. x86) these calls take a non-trivial fraction of runtime
-                    // when compiling very small programs.
-                    if !unsafe { llvm::LLVMRustHasFeature(target_machine.raw(), cstr.as_ptr()) } {
-                        return false;
-                    }
+
+    let (unstable_target_features, target_features) = cfg_target_feature(sess, |feature| {
+        if let Some(feat) = to_llvm_features(sess, feature) {
+            // All the LLVM features this expands to must be enabled.
+            for llvm_feature in feat {
+                let cstr = SmallCStr::new(llvm_feature);
+                // `LLVMRustHasFeature` is moderately expensive. On targets with many
+                // features (e.g. x86) these calls take a non-trivial fraction of runtime
+                // when compiling very small programs.
+                if !unsafe { llvm::LLVMRustHasFeature(target_machine.raw(), cstr.as_ptr()) } {
+                    return false;
                 }
-                true
-            } else {
-                false
             }
-        })
-        .map(|(feature, _, _)| Symbol::intern(feature))
-        .collect();
-
-    // Add enabled and remove disabled features.
-    for (enabled, feature) in
-        sess.opts.cg.target_feature.split(',').filter_map(|s| match s.chars().next() {
-            Some('+') => Some((true, Symbol::intern(&s[1..]))),
-            Some('-') => Some((false, Symbol::intern(&s[1..]))),
-            _ => None,
-        })
-    {
-        if enabled {
-            // Also add all transitively implied features.
-
-            // We don't care about the order in `features` since the only thing we use it for is the
-            // `features.contains` below.
-            #[allow(rustc::potential_query_instability)]
-            features.extend(
-                sess.target
-                    .implied_target_features(feature.as_str())
-                    .iter()
-                    .map(|s| Symbol::intern(s)),
-            );
+            true
         } else {
-            // Remove transitively reverse-implied features.
-
-            // We don't care about the order in `features` since the only thing we use it for is the
-            // `features.contains` below.
-            #[allow(rustc::potential_query_instability)]
-            features.retain(|f| {
-                if sess.target.implied_target_features(f.as_str()).contains(&feature.as_str()) {
-                    // If `f` if implies `feature`, then `!feature` implies `!f`, so we have to
-                    // remove `f`. (This is the standard logical contraposition principle.)
-                    false
-                } else {
-                    // We can keep `f`.
-                    true
-                }
-            });
+            false
         }
-    }
+    });
 
-    // Filter enabled features based on feature gates.
-    let f = |allow_unstable| {
-        sess.target
-            .rust_target_features()
-            .iter()
-            .filter_map(|(feature, gate, _)| {
-                // The `allow_unstable` set is used by rustc internally to determined which target
-                // features are truly available, so we want to return even perma-unstable
-                // "forbidden" features.
-                if allow_unstable
-                    || (gate.in_cfg()
-                        && (sess.is_nightly_build() || gate.requires_nightly().is_none()))
-                {
-                    Some(Symbol::intern(feature))
-                } else {
-                    None
-                }
-            })
-            .filter(|feature| features.contains(&feature))
-            .collect()
-    };
-
-    let target_features = f(false);
-    let unstable_target_features = f(true);
     let mut cfg = TargetConfig {
         target_features,
         unstable_target_features,
@@ -707,6 +622,20 @@ pub(crate) fn target_cpu(sess: &Session) -> &str {
     handle_native(cpu_name)
 }
 
+/// The target features for compiler flags other than `-Ctarget-features`.
+fn llvm_features_by_flags(sess: &Session, features: &mut Vec<String>) {
+    target_features::retpoline_features_by_flags(sess, features);
+
+    // -Zfixed-x18
+    if sess.opts.unstable_opts.fixed_x18 {
+        if sess.target.arch != "aarch64" {
+            sess.dcx().emit_fatal(errors::FixedX18InvalidArch { arch: &sess.target.arch });
+        } else {
+            features.push("+reserve-x18".into());
+        }
+    }
+}
+
 /// The list of LLVM features computed from CLI flags (`-Ctarget-cpu`, `-Ctarget-feature`,
 /// `--target` and similar).
 pub(crate) fn global_llvm_features(
@@ -771,6 +700,8 @@ pub(crate) fn global_llvm_features(
             .split(',')
             .filter(|v| !v.is_empty())
             // Drop +v8plus feature introduced in LLVM 20.
+            // (Hard-coded target features do not go through `to_llvm_feature` since they already
+            // are LLVM feature names, hence we need a special case here.)
             .filter(|v| *v != "+v8plus" || get_version() >= (20, 0, 0))
             .map(String::from),
     );
@@ -781,97 +712,23 @@ pub(crate) fn global_llvm_features(
 
     // -Ctarget-features
     if !only_base_features {
-        let known_features = sess.target.rust_target_features();
-        // Will only be filled when `diagnostics` is set!
-        let mut featsmap = FxHashMap::default();
-
-        // Compute implied features
-        let mut all_rust_features = vec![];
-        for feature in sess.opts.cg.target_feature.split(',') {
-            if let Some(feature) = feature.strip_prefix('+') {
-                all_rust_features.extend(
-                    UnordSet::from(sess.target.implied_target_features(feature))
-                        .to_sorted_stable_ord()
-                        .iter()
-                        .map(|&&s| (true, s)),
-                )
-            } else if let Some(feature) = feature.strip_prefix('-') {
-                // FIXME: Why do we not remove implied features on "-" here?
-                // We do the equivalent above in `target_config`.
-                // See <https://github.com/rust-lang/rust/issues/134792>.
-                all_rust_features.push((false, feature));
-            } else if !feature.is_empty() {
-                if diagnostics {
-                    sess.dcx().emit_warn(UnknownCTargetFeaturePrefix { feature });
-                }
-            }
-        }
-        // Remove features that are meant for rustc, not LLVM.
-        all_rust_features.retain(|(_, feature)| {
-            // Retain if it is not a rustc feature
-            !RUSTC_SPECIFIC_FEATURES.contains(feature)
-        });
-
-        // Check feature validity.
-        if diagnostics {
-            for &(enable, feature) in &all_rust_features {
-                let feature_state = known_features.iter().find(|&&(v, _, _)| v == feature);
-                match feature_state {
-                    None => {
-                        let rust_feature =
-                            known_features.iter().find_map(|&(rust_feature, _, _)| {
-                                let llvm_features = to_llvm_features(sess, rust_feature)?;
-                                if llvm_features.contains(feature)
-                                    && !llvm_features.contains(rust_feature)
-                                {
-                                    Some(rust_feature)
-                                } else {
-                                    None
-                                }
-                            });
-                        let unknown_feature = if let Some(rust_feature) = rust_feature {
-                            UnknownCTargetFeature {
-                                feature,
-                                rust_feature: PossibleFeature::Some { rust_feature },
-                            }
-                        } else {
-                            UnknownCTargetFeature { feature, rust_feature: PossibleFeature::None }
-                        };
-                        sess.dcx().emit_warn(unknown_feature);
-                    }
-                    Some((_, stability, _)) => {
-                        if let Err(reason) = stability.toggle_allowed() {
-                            sess.dcx().emit_warn(ForbiddenCTargetFeature {
-                                feature,
-                                enabled: if enable { "enabled" } else { "disabled" },
-                                reason,
-                            });
-                        } else if stability.requires_nightly().is_some() {
-                            // An unstable feature. Warn about using it. It makes little sense
-                            // to hard-error here since we just warn about fully unknown
-                            // features above.
-                            sess.dcx().emit_warn(UnstableCTargetFeature { feature });
-                        }
-                    }
-                }
-
-                // FIXME(nagisa): figure out how to not allocate a full hashset here.
-                featsmap.insert(feature, enable);
-            }
-        }
-
-        // Translate this into LLVM features.
-        let feats = all_rust_features
-            .iter()
-            .filter_map(|&(enable, feature)| {
+        target_features::flag_to_backend_features(
+            sess,
+            diagnostics,
+            |feature| {
+                to_llvm_features(sess, feature)
+                    .map(|f| SmallVec::<[&str; 2]>::from_iter(f.into_iter()))
+                    .unwrap_or_default()
+            },
+            |feature, enable| {
                 let enable_disable = if enable { '+' } else { '-' };
                 // We run through `to_llvm_features` when
                 // passing requests down to LLVM. This means that all in-language
                 // features also work on the command line instead of having two
                 // different names when the LLVM name and the Rust name differ.
-                let llvm_feature = to_llvm_features(sess, feature)?;
+                let Some(llvm_feature) = to_llvm_features(sess, feature) else { return };
 
-                Some(
+                features.extend(
                     std::iter::once(format!(
                         "{}{}",
                         enable_disable, llvm_feature.llvm_feature_name
@@ -886,27 +743,12 @@ pub(crate) fn global_llvm_features(
                         },
                     )),
                 )
-            })
-            .flatten();
-        features.extend(feats);
-
-        if diagnostics && let Some(f) = check_tied_features(sess, &featsmap) {
-            sess.dcx().emit_err(rustc_codegen_ssa::errors::TargetFeatureDisableOrEnable {
-                features: f,
-                span: None,
-                missing_features: None,
-            });
-        }
+            },
+        );
     }
 
-    // -Zfixed-x18
-    if sess.opts.unstable_opts.fixed_x18 {
-        if sess.target.arch != "aarch64" {
-            sess.dcx().emit_fatal(FixedX18InvalidArch { arch: &sess.target.arch });
-        } else {
-            features.push("+reserve-x18".into());
-        }
-    }
+    // We add this in the "base target" so that these show up in `sess.unstable_target_features`.
+    llvm_features_by_flags(sess, &mut features);
 
     features
 }
diff --git a/compiler/rustc_codegen_llvm/src/type_.rs b/compiler/rustc_codegen_llvm/src/type_.rs
index 169036f5152..453eca2bbe1 100644
--- a/compiler/rustc_codegen_llvm/src/type_.rs
+++ b/compiler/rustc_codegen_llvm/src/type_.rs
@@ -1,4 +1,5 @@
 use std::borrow::Borrow;
+use std::hash::{Hash, Hasher};
 use std::{fmt, ptr};
 
 use libc::{c_char, c_uint};
@@ -25,6 +26,14 @@ impl PartialEq for Type {
     }
 }
 
+impl Eq for Type {}
+
+impl Hash for Type {
+    fn hash<H: Hasher>(&self, state: &mut H) {
+        ptr::hash(self, state);
+    }
+}
+
 impl fmt::Debug for Type {
     fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
         f.write_str(
@@ -49,13 +58,6 @@ impl<'ll, CX: Borrow<SCx<'ll>>> GenericCx<'ll, CX> {
     pub(crate) fn type_void(&self) -> &'ll Type {
         unsafe { llvm::LLVMVoidTypeInContext(self.llcx()) }
     }
-    pub(crate) fn type_token(&self) -> &'ll Type {
-        unsafe { llvm::LLVMTokenTypeInContext(self.llcx()) }
-    }
-
-    pub(crate) fn type_metadata(&self) -> &'ll Type {
-        unsafe { llvm::LLVMMetadataTypeInContext(self.llcx()) }
-    }
 
     ///x Creates an integer type with the given number of bits, e.g., i24
     pub(crate) fn type_ix(&self, num_bits: u64) -> &'ll Type {