15 files changed, 164 insertions, 102 deletions
diff --git a/compiler/rustc_codegen_llvm/src/abi.rs b/compiler/rustc_codegen_llvm/src/abi.rs
index 4b07c8aef91..009e7e2487b 100644
--- a/compiler/rustc_codegen_llvm/src/abi.rs
+++ b/compiler/rustc_codegen_llvm/src/abi.rs
@@ -146,7 +146,7 @@ impl LlvmType for CastTarget {
                 "total size {:?} cannot be divided into units of zero size",
                 self.rest.total
             );
-            if self.rest.total.bytes() % self.rest.unit.size.bytes() != 0 {
+            if !self.rest.total.bytes().is_multiple_of(self.rest.unit.size.bytes()) {
                 assert_eq!(self.rest.unit.kind, RegKind::Integer, "only int regs can be split");
             }
             self.rest.total.bytes().div_ceil(self.rest.unit.size.bytes())
diff --git a/compiler/rustc_codegen_llvm/src/allocator.rs b/compiler/rustc_codegen_llvm/src/allocator.rs
index 9dca63cfc8d..2b5090ed6db 100644
--- a/compiler/rustc_codegen_llvm/src/allocator.rs
+++ b/compiler/rustc_codegen_llvm/src/allocator.rs
@@ -11,7 +11,7 @@ use rustc_symbol_mangling::mangle_internal_symbol;
 
 use crate::builder::SBuilder;
 use crate::declare::declare_simple_fn;
-use crate::llvm::{self, False, True, Type};
+use crate::llvm::{self, False, True, Type, Value};
 use crate::{SimpleCx, attributes, debuginfo};
 
 pub(crate) unsafe fn codegen(
@@ -73,13 +73,14 @@ pub(crate) unsafe fn codegen(
     );
 
     unsafe {
-        // __rust_alloc_error_handler_should_panic
-        let name = mangle_internal_symbol(tcx, OomStrategy::SYMBOL);
-        let ll_g = cx.declare_global(&name, i8);
-        llvm::set_visibility(ll_g, llvm::Visibility::from_generic(tcx.sess.default_visibility()));
-        let val = tcx.sess.opts.unstable_opts.oom.should_panic();
-        let llval = llvm::LLVMConstInt(i8, val as u64, False);
-        llvm::set_initializer(ll_g, llval);
+        // __rust_alloc_error_handler_should_panic_v2
+        create_const_value_function(
+            tcx,
+            &cx,
+            &mangle_internal_symbol(tcx, OomStrategy::SYMBOL),
+            &i8,
+            &llvm::LLVMConstInt(i8, tcx.sess.opts.unstable_opts.oom.should_panic() as u64, False),
+        );
 
         // __rust_no_alloc_shim_is_unstable_v2
         create_wrapper_function(
@@ -100,6 +101,34 @@ pub(crate) unsafe fn codegen(
     }
 }
 
+fn create_const_value_function(
+    tcx: TyCtxt<'_>,
+    cx: &SimpleCx<'_>,
+    name: &str,
+    output: &Type,
+    value: &Value,
+) {
+    let ty = cx.type_func(&[], output);
+    let llfn = declare_simple_fn(
+        &cx,
+        name,
+        llvm::CallConv::CCallConv,
+        llvm::UnnamedAddr::Global,
+        llvm::Visibility::from_generic(tcx.sess.default_visibility()),
+        ty,
+    );
+
+    attributes::apply_to_llfn(
+        llfn,
+        llvm::AttributePlace::Function,
+        &[llvm::AttributeKind::AlwaysInline.create_attr(cx.llcx)],
+    );
+
+    let llbb = unsafe { llvm::LLVMAppendBasicBlockInContext(cx.llcx, llfn, c"entry".as_ptr()) };
+    let mut bx = SBuilder::build(&cx, llbb);
+    bx.ret(value);
+}
+
 fn create_wrapper_function(
     tcx: TyCtxt<'_>,
     cx: &SimpleCx<'_>,
diff --git a/compiler/rustc_codegen_llvm/src/attributes.rs b/compiler/rustc_codegen_llvm/src/attributes.rs
index 27fd09745ff..1ea5a062254 100644
--- a/compiler/rustc_codegen_llvm/src/attributes.rs
+++ b/compiler/rustc_codegen_llvm/src/attributes.rs
@@ -370,22 +370,6 @@ pub(crate) fn llfn_attrs_from_instance<'ll, 'tcx>(
     };
     to_add.extend(inline_attr(cx, inline));
 
-    // The `uwtable` attribute according to LLVM is:
-    //
-    //     This attribute indicates that the ABI being targeted requires that an
-    //     unwind table entry be produced for this function even if we can show
-    //     that no exceptions passes by it. This is normally the case for the
-    //     ELF x86-64 abi, but it can be disabled for some compilation units.
-    //
-    // Typically when we're compiling with `-C panic=abort` (which implies this
-    // `no_landing_pads` check) we don't need `uwtable` because we can't
-    // generate any exceptions! On Windows, however, exceptions include other
-    // events such as illegal instructions, segfaults, etc. This means that on
-    // Windows we end up still needing the `uwtable` attribute even if the `-C
-    // panic=abort` flag is passed.
-    //
-    // You can also find more info on why Windows always requires uwtables here:
-    //      https://bugzilla.mozilla.org/show_bug.cgi?id=1302078
     if cx.sess().must_emit_unwind_tables() {
         to_add.push(uwtable_attr(cx.llcx, cx.sess().opts.unstable_opts.use_sync_unwind));
     }
@@ -491,11 +475,7 @@ pub(crate) fn llfn_attrs_from_instance<'ll, 'tcx>(
         let allocated_pointer = AttributeKind::AllocatedPointer.create_attr(cx.llcx);
         attributes::apply_to_llfn(llfn, AttributePlace::Argument(0), &[allocated_pointer]);
     }
-    // function alignment can be set globally with the `-Zmin-function-alignment=<n>` flag;
-    // the alignment from a `#[repr(align(<n>))]` is used if it specifies a higher alignment.
-    if let Some(align) =
-        Ord::max(cx.tcx.sess.opts.unstable_opts.min_function_alignment, codegen_fn_attrs.alignment)
-    {
+    if let Some(align) = codegen_fn_attrs.alignment {
         llvm::set_alignment(llfn, align);
     }
     if let Some(backchain) = backchain_attr(cx) {
diff --git a/compiler/rustc_codegen_llvm/src/back/lto.rs b/compiler/rustc_codegen_llvm/src/back/lto.rs
index ee46b49a094..9c62244f3c9 100644
--- a/compiler/rustc_codegen_llvm/src/back/lto.rs
+++ b/compiler/rustc_codegen_llvm/src/back/lto.rs
@@ -587,7 +587,7 @@ fn thin_lto(
 }
 
 fn enable_autodiff_settings(ad: &[config::AutoDiff]) {
-    for &val in ad {
+    for val in ad {
         // We intentionally don't use a wildcard, to not forget handling anything new.
         match val {
             config::AutoDiff::PrintPerf => {
@@ -599,6 +599,10 @@ fn enable_autodiff_settings(ad: &[config::AutoDiff]) {
             config::AutoDiff::PrintTA => {
                 llvm::set_print_type(true);
             }
+            config::AutoDiff::PrintTAFn(fun) => {
+                llvm::set_print_type(true); // Enable general type printing
+                llvm::set_print_type_fun(&fun); // Set specific function to analyze
+            }
             config::AutoDiff::Inline => {
                 llvm::set_inline(true);
             }
diff --git a/compiler/rustc_codegen_llvm/src/builder.rs b/compiler/rustc_codegen_llvm/src/builder.rs
index 5e9594dd06b..d0aa7320b4b 100644
--- a/compiler/rustc_codegen_llvm/src/builder.rs
+++ b/compiler/rustc_codegen_llvm/src/builder.rs
@@ -1166,11 +1166,10 @@ impl<'a, 'll, 'tcx> BuilderMethods<'a, 'tcx> for Builder<'a, 'll, 'tcx> {
         (self.extract_value(landing_pad, 0), self.extract_value(landing_pad, 1))
     }
 
-    fn filter_landing_pad(&mut self, pers_fn: &'ll Value) -> (&'ll Value, &'ll Value) {
+    fn filter_landing_pad(&mut self, pers_fn: &'ll Value) {
         let ty = self.type_struct(&[self.type_ptr(), self.type_i32()], false);
         let landing_pad = self.landing_pad(ty, pers_fn, 1);
         self.add_clause(landing_pad, self.const_array(self.type_ptr(), &[]));
-        (self.extract_value(landing_pad, 0), self.extract_value(landing_pad, 1))
     }
 
     fn resume(&mut self, exn0: &'ll Value, exn1: &'ll Value) {
diff --git a/compiler/rustc_codegen_llvm/src/common.rs b/compiler/rustc_codegen_llvm/src/common.rs
index ae5add59322..92f38565eef 100644
--- a/compiler/rustc_codegen_llvm/src/common.rs
+++ b/compiler/rustc_codegen_llvm/src/common.rs
@@ -175,7 +175,7 @@ impl<'ll, 'tcx> ConstCodegenMethods for CodegenCx<'ll, 'tcx> {
     }
 
     fn const_usize(&self, i: u64) -> &'ll Value {
-        let bit_size = self.data_layout().pointer_size.bits();
+        let bit_size = self.data_layout().pointer_size().bits();
         if bit_size < 64 {
             // make sure it doesn't overflow
             assert!(i < (1 << bit_size));
@@ -268,7 +268,7 @@ impl<'ll, 'tcx> ConstCodegenMethods for CodegenCx<'ll, 'tcx> {
                 }
             }
             Scalar::Ptr(ptr, _size) => {
-                let (prov, offset) = ptr.into_parts();
+                let (prov, offset) = ptr.prov_and_relative_offset();
                 let global_alloc = self.tcx.global_alloc(prov.alloc_id());
                 let base_addr = match global_alloc {
                     GlobalAlloc::Memory(alloc) => {
diff --git a/compiler/rustc_codegen_llvm/src/consts.rs b/compiler/rustc_codegen_llvm/src/consts.rs
index a4492d76c3c..28f5282c6b0 100644
--- a/compiler/rustc_codegen_llvm/src/consts.rs
+++ b/compiler/rustc_codegen_llvm/src/consts.rs
@@ -43,7 +43,8 @@ pub(crate) fn const_alloc_to_llvm<'ll>(
     }
     let mut llvals = Vec::with_capacity(alloc.provenance().ptrs().len() + 1);
     let dl = cx.data_layout();
-    let pointer_size = dl.pointer_size.bytes() as usize;
+    let pointer_size = dl.pointer_size();
+    let pointer_size_bytes = pointer_size.bytes() as usize;
 
     // Note: this function may call `inspect_with_uninit_and_ptr_outside_interpreter`, so `range`
     // must be within the bounds of `alloc` and not contain or overlap a pointer provenance.
@@ -100,7 +101,9 @@ pub(crate) fn const_alloc_to_llvm<'ll>(
             // This `inspect` is okay since it is within the bounds of the allocation, it doesn't
             // affect interpreter execution (we inspect the result after interpreter execution),
             // and we properly interpret the provenance as a relocation pointer offset.
-            alloc.inspect_with_uninit_and_ptr_outside_interpreter(offset..(offset + pointer_size)),
+            alloc.inspect_with_uninit_and_ptr_outside_interpreter(
+                offset..(offset + pointer_size_bytes),
+            ),
         )
         .expect("const_alloc_to_llvm: could not read relocation pointer")
             as u64;
@@ -111,11 +114,11 @@ pub(crate) fn const_alloc_to_llvm<'ll>(
             InterpScalar::from_pointer(Pointer::new(prov, Size::from_bytes(ptr_offset)), &cx.tcx),
             Scalar::Initialized {
                 value: Primitive::Pointer(address_space),
-                valid_range: WrappingRange::full(dl.pointer_size),
+                valid_range: WrappingRange::full(pointer_size),
             },
             cx.type_ptr_ext(address_space),
         ));
-        next_offset = offset + pointer_size;
+        next_offset = offset + pointer_size_bytes;
     }
     if alloc.len() >= next_offset {
         let range = next_offset..alloc.len();
diff --git a/compiler/rustc_codegen_llvm/src/context.rs b/compiler/rustc_codegen_llvm/src/context.rs
index 0324dff6ff2..90582e23b04 100644
--- a/compiler/rustc_codegen_llvm/src/context.rs
+++ b/compiler/rustc_codegen_llvm/src/context.rs
@@ -605,7 +605,7 @@ impl<'ll, 'tcx> CodegenCx<'ll, 'tcx> {
         GenericCx(
             FullCx {
                 tcx,
-                scx: SimpleCx::new(llmod, llcx, tcx.data_layout.pointer_size),
+                scx: SimpleCx::new(llmod, llcx, tcx.data_layout.pointer_size()),
                 use_dll_storage_attrs,
                 tls_model,
                 codegen_unit,
diff --git a/compiler/rustc_codegen_llvm/src/debuginfo/metadata.rs b/compiler/rustc_codegen_llvm/src/debuginfo/metadata.rs
index 7f3e486ca31..9b4736e50e6 100644
--- a/compiler/rustc_codegen_llvm/src/debuginfo/metadata.rs
+++ b/compiler/rustc_codegen_llvm/src/debuginfo/metadata.rs
@@ -159,13 +159,15 @@ fn build_pointer_or_reference_di_node<'ll, 'tcx>(
     return_if_di_node_created_in_meantime!(cx, unique_type_id);
 
     let data_layout = &cx.tcx.data_layout;
+    let pointer_size = data_layout.pointer_size();
+    let pointer_align = data_layout.pointer_align();
     let ptr_type_debuginfo_name = compute_debuginfo_type_name(cx.tcx, ptr_type, true);
 
     match wide_pointer_kind(cx, pointee_type) {
         None => {
             // This is a thin pointer. Create a regular pointer type and give it the correct name.
             assert_eq!(
-                (data_layout.pointer_size, data_layout.pointer_align.abi),
+                (pointer_size, pointer_align.abi),
                 cx.size_and_align_of(ptr_type),
                 "ptr_type={ptr_type}, pointee_type={pointee_type}",
             );
@@ -174,8 +176,8 @@ fn build_pointer_or_reference_di_node<'ll, 'tcx>(
                 llvm::LLVMRustDIBuilderCreatePointerType(
                     DIB(cx),
                     pointee_type_di_node,
-                    data_layout.pointer_size.bits(),
-                    data_layout.pointer_align.abi.bits() as u32,
+                    pointer_size.bits(),
+                    pointer_align.abi.bits() as u32,
                     0, // Ignore DWARF address space.
                     ptr_type_debuginfo_name.as_c_char_ptr(),
                     ptr_type_debuginfo_name.len(),
@@ -319,7 +321,9 @@ fn build_subroutine_type_di_node<'ll, 'tcx>(
     let name = compute_debuginfo_type_name(cx.tcx, fn_ty, false);
     let (size, align) = match fn_ty.kind() {
         ty::FnDef(..) => (Size::ZERO, Align::ONE),
-        ty::FnPtr(..) => (cx.tcx.data_layout.pointer_size, cx.tcx.data_layout.pointer_align.abi),
+        ty::FnPtr(..) => {
+            (cx.tcx.data_layout.pointer_size(), cx.tcx.data_layout.pointer_align().abi)
+        }
         _ => unreachable!(),
     };
     let di_node = unsafe {
@@ -504,7 +508,7 @@ fn recursion_marker_type_di_node<'ll, 'tcx>(cx: &CodegenCx<'ll, 'tcx>) -> &'ll D
         create_basic_type(
             cx,
             "<recur_type>",
-            cx.tcx.data_layout.pointer_size,
+            cx.tcx.data_layout.pointer_size(),
             dwarf_const::DW_ATE_unsigned,
         )
     })
diff --git a/compiler/rustc_codegen_llvm/src/intrinsic.rs b/compiler/rustc_codegen_llvm/src/intrinsic.rs
index f7f062849a8..fcc0d378f06 100644
--- a/compiler/rustc_codegen_llvm/src/intrinsic.rs
+++ b/compiler/rustc_codegen_llvm/src/intrinsic.rs
@@ -103,23 +103,25 @@ fn call_simple_intrinsic<'ll, 'tcx>(
         sym::minnumf64 => ("llvm.minnum", &[bx.type_f64()]),
         sym::minnumf128 => ("llvm.minnum", &[bx.type_f128()]),
 
-        sym::minimumf16 => ("llvm.minimum", &[bx.type_f16()]),
-        sym::minimumf32 => ("llvm.minimum", &[bx.type_f32()]),
-        sym::minimumf64 => ("llvm.minimum", &[bx.type_f64()]),
-        // There are issues on x86_64 and aarch64 with the f128 variant,
-        // let's instead use the instrinsic fallback body.
-        // sym::minimumf128 => ("llvm.minimum", &[cx.type_f128()]),
+        // FIXME: LLVM currently mis-compile those intrinsics, re-enable them
+        // when llvm/llvm-project#{139380,139381,140445} are fixed.
+        //sym::minimumf16 => ("llvm.minimum", &[bx.type_f16()]),
+        //sym::minimumf32 => ("llvm.minimum", &[bx.type_f32()]),
+        //sym::minimumf64 => ("llvm.minimum", &[bx.type_f64()]),
+        //sym::minimumf128 => ("llvm.minimum", &[cx.type_f128()]),
+        //
         sym::maxnumf16 => ("llvm.maxnum", &[bx.type_f16()]),
         sym::maxnumf32 => ("llvm.maxnum", &[bx.type_f32()]),
         sym::maxnumf64 => ("llvm.maxnum", &[bx.type_f64()]),
         sym::maxnumf128 => ("llvm.maxnum", &[bx.type_f128()]),
 
-        sym::maximumf16 => ("llvm.maximum", &[bx.type_f16()]),
-        sym::maximumf32 => ("llvm.maximum", &[bx.type_f32()]),
-        sym::maximumf64 => ("llvm.maximum", &[bx.type_f64()]),
-        // There are issues on x86_64 and aarch64 with the f128 variant,
-        // let's instead use the instrinsic fallback body.
-        // sym::maximumf128 => ("llvm.maximum", &[cx.type_f128()]),
+        // FIXME: LLVM currently mis-compile those intrinsics, re-enable them
+        // when llvm/llvm-project#{139380,139381,140445} are fixed.
+        //sym::maximumf16 => ("llvm.maximum", &[bx.type_f16()]),
+        //sym::maximumf32 => ("llvm.maximum", &[bx.type_f32()]),
+        //sym::maximumf64 => ("llvm.maximum", &[bx.type_f64()]),
+        //sym::maximumf128 => ("llvm.maximum", &[cx.type_f128()]),
+        //
         sym::copysignf16 => ("llvm.copysign", &[bx.type_f16()]),
         sym::copysignf32 => ("llvm.copysign", &[bx.type_f32()]),
         sym::copysignf64 => ("llvm.copysign", &[bx.type_f64()]),
@@ -456,7 +458,7 @@ impl<'ll, 'tcx> IntrinsicCallBuilderMethods<'tcx> for Builder<'_, 'll, 'tcx> {
                         // For rusty ABIs, small aggregates are actually passed
                         // as `RegKind::Integer` (see `FnAbi::adjust_for_abi`),
                         // so we re-use that same threshold here.
-                        layout.size() <= self.data_layout().pointer_size * 2
+                        layout.size() <= self.data_layout().pointer_size() * 2
                     }
                 };
 
@@ -756,8 +758,8 @@ fn codegen_msvc_try<'ll, 'tcx>(
         //      }
         //
         // More information can be found in libstd's seh.rs implementation.
-        let ptr_size = bx.tcx().data_layout.pointer_size;
-        let ptr_align = bx.tcx().data_layout.pointer_align.abi;
+        let ptr_size = bx.tcx().data_layout.pointer_size();
+        let ptr_align = bx.tcx().data_layout.pointer_align().abi;
         let slot = bx.alloca(ptr_size, ptr_align);
         let try_func_ty = bx.type_func(&[bx.type_ptr()], bx.type_void());
         bx.invoke(try_func_ty, None, None, try_func, &[data], normal, catchswitch, None, None);
@@ -1029,8 +1031,8 @@ fn codegen_emcc_try<'ll, 'tcx>(
 
         // We need to pass two values to catch_func (ptr and is_rust_panic), so
         // create an alloca and pass a pointer to that.
-        let ptr_size = bx.tcx().data_layout.pointer_size;
-        let ptr_align = bx.tcx().data_layout.pointer_align.abi;
+        let ptr_size = bx.tcx().data_layout.pointer_size();
+        let ptr_align = bx.tcx().data_layout.pointer_align().abi;
         let i8_align = bx.tcx().data_layout.i8_align.abi;
         // Required in order for there to be no padding between the fields.
         assert!(i8_align <= ptr_align);
@@ -1156,9 +1158,11 @@ fn generic_simd_intrinsic<'ll, 'tcx>(
     macro_rules! require_int_or_uint_ty {
         ($ty: expr, $diag: expr) => {
             match $ty {
-                ty::Int(i) => i.bit_width().unwrap_or_else(|| bx.data_layout().pointer_size.bits()),
+                ty::Int(i) => {
+                    i.bit_width().unwrap_or_else(|| bx.data_layout().pointer_size().bits())
+                }
                 ty::Uint(i) => {
-                    i.bit_width().unwrap_or_else(|| bx.data_layout().pointer_size.bits())
+                    i.bit_width().unwrap_or_else(|| bx.data_layout().pointer_size().bits())
                 }
                 _ => {
                     return_error!($diag);
@@ -1537,6 +1541,7 @@ fn generic_simd_intrinsic<'ll, 'tcx>(
             sym::simd_fsin => "llvm.sin",
             sym::simd_fsqrt => "llvm.sqrt",
             sym::simd_round => "llvm.round",
+            sym::simd_round_ties_even => "llvm.rint",
             sym::simd_trunc => "llvm.trunc",
             _ => return_error!(InvalidMonomorphization::UnrecognizedIntrinsic { span, name }),
         };
@@ -1563,6 +1568,7 @@ fn generic_simd_intrinsic<'ll, 'tcx>(
             | sym::simd_fsqrt
             | sym::simd_relaxed_fma
             | sym::simd_round
+            | sym::simd_round_ties_even
             | sym::simd_trunc
     ) {
         return simd_simple_float_intrinsic(name, in_elem, in_ty, in_len, bx, span, args);
@@ -2010,10 +2016,10 @@ fn generic_simd_intrinsic<'ll, 'tcx>(
                 } else {
                     let bitwidth = match in_elem.kind() {
                         ty::Int(i) => {
-                            i.bit_width().unwrap_or_else(|| bx.data_layout().pointer_size.bits())
+                            i.bit_width().unwrap_or_else(|| bx.data_layout().pointer_size().bits())
                         }
                         ty::Uint(i) => {
-                            i.bit_width().unwrap_or_else(|| bx.data_layout().pointer_size.bits())
+                            i.bit_width().unwrap_or_else(|| bx.data_layout().pointer_size().bits())
                         }
                         _ => return_error!(InvalidMonomorphization::UnsupportedSymbol {
                             span,
@@ -2309,7 +2315,13 @@ fn generic_simd_intrinsic<'ll, 'tcx>(
     // Unary integer intrinsics
     if matches!(
         name,
-        sym::simd_bswap | sym::simd_bitreverse | sym::simd_ctlz | sym::simd_ctpop | sym::simd_cttz
+        sym::simd_bswap
+            | sym::simd_bitreverse
+            | sym::simd_ctlz
+            | sym::simd_ctpop
+            | sym::simd_cttz
+            | sym::simd_funnel_shl
+            | sym::simd_funnel_shr
     ) {
         let vec_ty = bx.cx.type_vector(
             match *in_elem.kind() {
@@ -2330,6 +2342,8 @@ fn generic_simd_intrinsic<'ll, 'tcx>(
             sym::simd_ctlz => "llvm.ctlz",
             sym::simd_ctpop => "llvm.ctpop",
             sym::simd_cttz => "llvm.cttz",
+            sym::simd_funnel_shl => "llvm.fshl",
+            sym::simd_funnel_shr => "llvm.fshr",
             _ => unreachable!(),
         };
         let int_size = in_elem.int_size_and_signed(bx.tcx()).0.bits();
@@ -2350,6 +2364,11 @@ fn generic_simd_intrinsic<'ll, 'tcx>(
                 // simple unary argument cases
                 Ok(bx.call_intrinsic(llvm_intrinsic, &[vec_ty], &[args[0].immediate()]))
             }
+            sym::simd_funnel_shl | sym::simd_funnel_shr => Ok(bx.call_intrinsic(
+                llvm_intrinsic,
+                &[vec_ty],
+                &[args[0].immediate(), args[1].immediate(), args[2].immediate()],
+            )),
             _ => unreachable!(),
         };
     }
diff --git a/compiler/rustc_codegen_llvm/src/lib.rs b/compiler/rustc_codegen_llvm/src/lib.rs
index cdfffbe47bf..63ca51b006d 100644
--- a/compiler/rustc_codegen_llvm/src/lib.rs
+++ b/compiler/rustc_codegen_llvm/src/lib.rs
@@ -113,7 +113,7 @@ impl ExtraBackendMethods for LlvmCodegenBackend {
     ) -> ModuleLlvm {
         let module_llvm = ModuleLlvm::new_metadata(tcx, module_name);
         let cx =
-            SimpleCx::new(module_llvm.llmod(), &module_llvm.llcx, tcx.data_layout.pointer_size);
+            SimpleCx::new(module_llvm.llmod(), &module_llvm.llcx, tcx.data_layout.pointer_size());
         unsafe {
             allocator::codegen(tcx, cx, module_name, kind, alloc_error_handler_kind);
         }
diff --git a/compiler/rustc_codegen_llvm/src/llvm/enzyme_ffi.rs b/compiler/rustc_codegen_llvm/src/llvm/enzyme_ffi.rs
index 2ad39fc8538..c696b8d8ff2 100644
--- a/compiler/rustc_codegen_llvm/src/llvm/enzyme_ffi.rs
+++ b/compiler/rustc_codegen_llvm/src/llvm/enzyme_ffi.rs
@@ -1,4 +1,3 @@
-#![allow(non_camel_case_types)]
 #![expect(dead_code)]
 
 use libc::{c_char, c_uint};
@@ -40,7 +39,7 @@ unsafe extern "C" {
     pub(crate) fn LLVMDumpValue(V: &Value);
     pub(crate) fn LLVMGetFunctionCallConv(F: &Value) -> c_uint;
     pub(crate) fn LLVMGetReturnType(T: &Type) -> &Type;
-    pub(crate) fn LLVMGetParams(Fnc: &Value, parms: *mut &Value);
+    pub(crate) fn LLVMGetParams(Fnc: &Value, params: *mut &Value);
     pub(crate) fn LLVMGetNamedFunction(M: &Module, Name: *const c_char) -> Option<&Value>;
 }
 
@@ -57,14 +56,19 @@ pub(crate) use self::Enzyme_AD::*;
 
 #[cfg(llvm_enzyme)]
 pub(crate) mod Enzyme_AD {
+    use std::ffi::{CString, c_char};
+
     use libc::c_void;
+
     unsafe extern "C" {
         pub(crate) fn EnzymeSetCLBool(arg1: *mut ::std::os::raw::c_void, arg2: u8);
+        pub(crate) fn EnzymeSetCLString(arg1: *mut ::std::os::raw::c_void, arg2: *const c_char);
     }
     unsafe extern "C" {
         static mut EnzymePrintPerf: c_void;
         static mut EnzymePrintActivity: c_void;
         static mut EnzymePrintType: c_void;
+        static mut EnzymeFunctionToAnalyze: c_void;
         static mut EnzymePrint: c_void;
         static mut EnzymeStrictAliasing: c_void;
         static mut looseTypeAnalysis: c_void;
@@ -86,6 +90,15 @@ pub(crate) mod Enzyme_AD {
             EnzymeSetCLBool(std::ptr::addr_of_mut!(EnzymePrintType), print as u8);
         }
     }
+    pub(crate) fn set_print_type_fun(fun_name: &str) {
+        let c_fun_name = CString::new(fun_name).unwrap();
+        unsafe {
+            EnzymeSetCLString(
+                std::ptr::addr_of_mut!(EnzymeFunctionToAnalyze),
+                c_fun_name.as_ptr() as *const c_char,
+            );
+        }
+    }
     pub(crate) fn set_print(print: bool) {
         unsafe {
             EnzymeSetCLBool(std::ptr::addr_of_mut!(EnzymePrint), print as u8);
@@ -132,6 +145,9 @@ pub(crate) mod Fallback_AD {
     pub(crate) fn set_print_type(print: bool) {
         unimplemented!()
     }
+    pub(crate) fn set_print_type_fun(fun_name: &str) {
+        unimplemented!()
+    }
     pub(crate) fn set_print(print: bool) {
         unimplemented!()
     }
diff --git a/compiler/rustc_codegen_llvm/src/llvm_util.rs b/compiler/rustc_codegen_llvm/src/llvm_util.rs
index 6fd07d562af..0fb987bdf82 100644
--- a/compiler/rustc_codegen_llvm/src/llvm_util.rs
+++ b/compiler/rustc_codegen_llvm/src/llvm_util.rs
@@ -370,10 +370,18 @@ fn update_target_reliable_float_cfg(sess: &Session, cfg: &mut TargetConfig) {
     let target_env = sess.target.options.env.as_ref();
     let target_abi = sess.target.options.abi.as_ref();
     let target_pointer_width = sess.target.pointer_width;
+    let version = get_version();
 
     cfg.has_reliable_f16 = match (target_arch, target_os) {
         // Selection failure <https://github.com/llvm/llvm-project/issues/50374>
         ("s390x", _) => false,
+        // LLVM crash without neon <https://github.com/llvm/llvm-project/issues/129394> (now fixed)
+        ("aarch64", _)
+            if !cfg.target_features.iter().any(|f| f.as_str() == "neon")
+                && version < (20, 1, 1) =>
+        {
+            false
+        }
         // Unsupported <https://github.com/llvm/llvm-project/issues/94434>
         ("arm64ec", _) => false,
         // MinGW ABI bugs <https://gcc.gnu.org/bugzilla/show_bug.cgi?id=115054>
diff --git a/compiler/rustc_codegen_llvm/src/type_.rs b/compiler/rustc_codegen_llvm/src/type_.rs
index 453eca2bbe1..ee472e75ed4 100644
--- a/compiler/rustc_codegen_llvm/src/type_.rs
+++ b/compiler/rustc_codegen_llvm/src/type_.rs
@@ -208,7 +208,7 @@ impl<'ll, CX: Borrow<SCx<'ll>>> BaseTypeCodegenMethods for GenericCx<'ll, CX> {
     }
 
     fn type_ptr(&self) -> &'ll Type {
-        self.type_ptr_ext(AddressSpace::DATA)
+        self.type_ptr_ext(AddressSpace::ZERO)
     }
 
     fn type_ptr_ext(&self, address_space: AddressSpace) -> &'ll Type {
@@ -258,7 +258,7 @@ impl Type {
     }
 
     pub(crate) fn ptr_llcx(llcx: &llvm::Context) -> &Type {
-        unsafe { llvm::LLVMPointerTypeInContext(llcx, AddressSpace::DATA.0) }
+        unsafe { llvm::LLVMPointerTypeInContext(llcx, AddressSpace::ZERO.0) }
     }
 }
 
diff --git a/compiler/rustc_codegen_llvm/src/va_arg.rs b/compiler/rustc_codegen_llvm/src/va_arg.rs
index 236568590be..ce079f3cb0a 100644
--- a/compiler/rustc_codegen_llvm/src/va_arg.rs
+++ b/compiler/rustc_codegen_llvm/src/va_arg.rs
@@ -45,7 +45,8 @@ fn emit_direct_ptr_va_arg<'ll, 'tcx>(
     let va_list_ty = bx.type_ptr();
     let va_list_addr = list.immediate();
 
-    let ptr = bx.load(va_list_ty, va_list_addr, bx.tcx().data_layout.pointer_align.abi);
+    let ptr_align_abi = bx.tcx().data_layout.pointer_align().abi;
+    let ptr = bx.load(va_list_ty, va_list_addr, ptr_align_abi);
 
     let (addr, addr_align) = if allow_higher_align && align > slot_size {
         (round_pointer_up_to_alignment(bx, ptr, align, bx.type_ptr()), align)
@@ -56,7 +57,7 @@ fn emit_direct_ptr_va_arg<'ll, 'tcx>(
     let aligned_size = size.align_to(slot_size).bytes() as i32;
     let full_direct_size = bx.cx().const_i32(aligned_size);
     let next = bx.inbounds_ptradd(addr, full_direct_size);
-    bx.store(next, va_list_addr, bx.tcx().data_layout.pointer_align.abi);
+    bx.store(next, va_list_addr, ptr_align_abi);
 
     if size.bytes() < slot_size.bytes()
         && bx.tcx().sess.target.endian == Endian::Big
@@ -108,8 +109,8 @@ fn emit_ptr_va_arg<'ll, 'tcx>(
     let (llty, size, align) = if indirect {
         (
             bx.cx.layout_of(Ty::new_imm_ptr(bx.cx.tcx, target_ty)).llvm_type(bx.cx),
-            bx.cx.data_layout().pointer_size,
-            bx.cx.data_layout().pointer_align,
+            bx.cx.data_layout().pointer_size(),
+            bx.cx.data_layout().pointer_align(),
         )
     } else {
         (layout.llvm_type(bx.cx), layout.size, layout.align)
@@ -172,10 +173,10 @@ fn emit_aapcs_va_arg<'ll, 'tcx>(
 
     let gr_type = target_ty.is_any_ptr() || target_ty.is_integral();
     let (reg_off, reg_top, slot_size) = if gr_type {
-        let nreg = (layout.size.bytes() + 7) / 8;
+        let nreg = layout.size.bytes().div_ceil(8);
         (gr_offs, gr_top, nreg * 8)
     } else {
-        let nreg = (layout.size.bytes() + 15) / 16;
+        let nreg = layout.size.bytes().div_ceil(16);
         (vr_offs, vr_top, nreg * 16)
     };
 
@@ -204,7 +205,7 @@ fn emit_aapcs_va_arg<'ll, 'tcx>(
 
     bx.switch_to_block(in_reg);
     let top_type = bx.type_ptr();
-    let top = bx.load(top_type, reg_top, dl.pointer_align.abi);
+    let top = bx.load(top_type, reg_top, dl.pointer_align().abi);
 
     // reg_value = *(@top + reg_off_v);
     let mut reg_addr = bx.ptradd(top, reg_off_v);
@@ -297,6 +298,7 @@ fn emit_powerpc_va_arg<'ll, 'tcx>(
 
     let max_regs = 8u8;
     let use_regs = bx.icmp(IntPredicate::IntULT, num_regs, bx.const_u8(max_regs));
+    let ptr_align_abi = bx.tcx().data_layout.pointer_align().abi;
 
     let in_reg = bx.append_sibling_block("va_arg.in_reg");
     let in_mem = bx.append_sibling_block("va_arg.in_mem");
@@ -308,7 +310,7 @@ fn emit_powerpc_va_arg<'ll, 'tcx>(
         bx.switch_to_block(in_reg);
 
         let reg_safe_area_ptr = bx.inbounds_ptradd(va_list_addr, bx.cx.const_usize(1 + 1 + 2 + 4));
-        let mut reg_addr = bx.load(bx.type_ptr(), reg_safe_area_ptr, dl.pointer_align.abi);
+        let mut reg_addr = bx.load(bx.type_ptr(), reg_safe_area_ptr, ptr_align_abi);
 
         // Floating-point registers start after the general-purpose registers.
         if !is_int && !is_soft_float_abi {
@@ -342,11 +344,11 @@ fn emit_powerpc_va_arg<'ll, 'tcx>(
         let size = if !is_indirect {
             layout.layout.size.align_to(overflow_area_align)
         } else {
-            dl.pointer_size
+            dl.pointer_size()
         };
 
         let overflow_area_ptr = bx.inbounds_ptradd(va_list_addr, bx.cx.const_usize(1 + 1 + 2));
-        let mut overflow_area = bx.load(bx.type_ptr(), overflow_area_ptr, dl.pointer_align.abi);
+        let mut overflow_area = bx.load(bx.type_ptr(), overflow_area_ptr, ptr_align_abi);
 
         // Round up address of argument to alignment
         if layout.layout.align.abi > overflow_area_align {
@@ -362,7 +364,7 @@ fn emit_powerpc_va_arg<'ll, 'tcx>(
 
         // Increase the overflow area.
         overflow_area = bx.inbounds_ptradd(overflow_area, bx.const_usize(size.bytes()));
-        bx.store(overflow_area, overflow_area_ptr, dl.pointer_align.abi);
+        bx.store(overflow_area, overflow_area_ptr, ptr_align_abi);
 
         bx.br(end);
 
@@ -373,11 +375,8 @@ fn emit_powerpc_va_arg<'ll, 'tcx>(
     bx.switch_to_block(end);
     let val_addr = bx.phi(bx.type_ptr(), &[reg_addr, mem_addr], &[in_reg, in_mem]);
     let val_type = layout.llvm_type(bx);
-    let val_addr = if is_indirect {
-        bx.load(bx.cx.type_ptr(), val_addr, dl.pointer_align.abi)
-    } else {
-        val_addr
-    };
+    let val_addr =
+        if is_indirect { bx.load(bx.cx.type_ptr(), val_addr, ptr_align_abi) } else { val_addr };
     bx.load(val_type, val_addr, layout.align.abi)
 }
 
@@ -414,6 +413,7 @@ fn emit_s390x_va_arg<'ll, 'tcx>(
     let in_reg = bx.append_sibling_block("va_arg.in_reg");
     let in_mem = bx.append_sibling_block("va_arg.in_mem");
     let end = bx.append_sibling_block("va_arg.end");
+    let ptr_align_abi = dl.pointer_align().abi;
 
     // FIXME: vector ABI not yet supported.
     let target_ty_size = bx.cx.size_of(target_ty).bytes();
@@ -435,7 +435,7 @@ fn emit_s390x_va_arg<'ll, 'tcx>(
     bx.switch_to_block(in_reg);
 
     // Work out the address of the value in the register save area.
-    let reg_ptr_v = bx.load(bx.type_ptr(), reg_save_area, dl.pointer_align.abi);
+    let reg_ptr_v = bx.load(bx.type_ptr(), reg_save_area, ptr_align_abi);
     let scaled_reg_count = bx.mul(reg_count_v, bx.const_u64(8));
     let reg_off = bx.add(scaled_reg_count, bx.const_u64(reg_save_index * 8 + reg_padding));
     let reg_addr = bx.ptradd(reg_ptr_v, reg_off);
@@ -449,15 +449,14 @@ fn emit_s390x_va_arg<'ll, 'tcx>(
     bx.switch_to_block(in_mem);
 
     // Work out the address of the value in the argument overflow area.
-    let arg_ptr_v =
-        bx.load(bx.type_ptr(), overflow_arg_area, bx.tcx().data_layout.pointer_align.abi);
+    let arg_ptr_v = bx.load(bx.type_ptr(), overflow_arg_area, ptr_align_abi);
     let arg_off = bx.const_u64(padding);
     let mem_addr = bx.ptradd(arg_ptr_v, arg_off);
 
     // Update the argument overflow area pointer.
     let arg_size = bx.cx().const_u64(padded_size);
     let new_arg_ptr_v = bx.inbounds_ptradd(arg_ptr_v, arg_size);
-    bx.store(new_arg_ptr_v, overflow_arg_area, dl.pointer_align.abi);
+    bx.store(new_arg_ptr_v, overflow_arg_area, ptr_align_abi);
     bx.br(end);
 
     // Return the appropriate result.
@@ -465,7 +464,7 @@ fn emit_s390x_va_arg<'ll, 'tcx>(
     let val_addr = bx.phi(bx.type_ptr(), &[reg_addr, mem_addr], &[in_reg, in_mem]);
     let val_type = layout.llvm_type(bx);
     let val_addr =
-        if indirect { bx.load(bx.cx.type_ptr(), val_addr, dl.pointer_align.abi) } else { val_addr };
+        if indirect { bx.load(bx.cx.type_ptr(), val_addr, ptr_align_abi) } else { val_addr };
     bx.load(val_type, val_addr, layout.align.abi)
 }
 
@@ -607,7 +606,7 @@ fn emit_x86_64_sysv64_va_arg<'ll, 'tcx>(
     // loads than necessary. Can we clean this up?
     let reg_save_area_ptr =
         bx.inbounds_ptradd(va_list_addr, bx.cx.const_usize(2 * unsigned_int_offset + ptr_offset));
-    let reg_save_area_v = bx.load(bx.type_ptr(), reg_save_area_ptr, dl.pointer_align.abi);
+    let reg_save_area_v = bx.load(bx.type_ptr(), reg_save_area_ptr, dl.pointer_align().abi);
 
     let reg_addr = match layout.layout.backend_repr() {
         BackendRepr::Scalar(scalar) => match scalar.primitive() {
@@ -749,10 +748,11 @@ fn x86_64_sysv64_va_arg_from_memory<'ll, 'tcx>(
     layout: TyAndLayout<'tcx, Ty<'tcx>>,
 ) -> &'ll Value {
     let dl = bx.cx.data_layout();
+    let ptr_align_abi = dl.data_layout().pointer_align().abi;
 
     let overflow_arg_area_ptr = bx.inbounds_ptradd(va_list_addr, bx.const_usize(8));
 
-    let overflow_arg_area_v = bx.load(bx.type_ptr(), overflow_arg_area_ptr, dl.pointer_align.abi);
+    let overflow_arg_area_v = bx.load(bx.type_ptr(), overflow_arg_area_ptr, ptr_align_abi);
     // AMD64-ABI 3.5.7p5: Step 7. Align l->overflow_arg_area upwards to a 16
     // byte boundary if alignment needed by type exceeds 8 byte boundary.
     // It isn't stated explicitly in the standard, but in practice we use
@@ -771,7 +771,7 @@ fn x86_64_sysv64_va_arg_from_memory<'ll, 'tcx>(
     let size_in_bytes = layout.layout.size().bytes();
     let offset = bx.const_i32(size_in_bytes.next_multiple_of(8) as i32);
     let overflow_arg_area = bx.inbounds_ptradd(overflow_arg_area_v, offset);
-    bx.store(overflow_arg_area, overflow_arg_area_ptr, dl.pointer_align.abi);
+    bx.store(overflow_arg_area, overflow_arg_area_ptr, ptr_align_abi);
 
     mem_addr
 }
@@ -803,6 +803,7 @@ fn emit_xtensa_va_arg<'ll, 'tcx>(
     let from_stack = bx.append_sibling_block("va_arg.from_stack");
     let from_regsave = bx.append_sibling_block("va_arg.from_regsave");
     let end = bx.append_sibling_block("va_arg.end");
+    let ptr_align_abi = bx.tcx().data_layout.pointer_align().abi;
 
     // (*va).va_ndx
     let va_reg_offset = 4;
@@ -825,12 +826,11 @@ fn emit_xtensa_va_arg<'ll, 'tcx>(
 
     bx.switch_to_block(from_regsave);
     // update va_ndx
-    bx.store(offset_next, offset_ptr, bx.tcx().data_layout.pointer_align.abi);
+    bx.store(offset_next, offset_ptr, ptr_align_abi);
 
     // (*va).va_reg
     let regsave_area_ptr = bx.inbounds_ptradd(va_list_addr, bx.cx.const_usize(va_reg_offset));
-    let regsave_area =
-        bx.load(bx.type_ptr(), regsave_area_ptr, bx.tcx().data_layout.pointer_align.abi);
+    let regsave_area = bx.load(bx.type_ptr(), regsave_area_ptr, ptr_align_abi);
     let regsave_value_ptr = bx.inbounds_ptradd(regsave_area, offset);
     bx.br(end);
 
@@ -849,11 +849,11 @@ fn emit_xtensa_va_arg<'ll, 'tcx>(
     // va_ndx = offset_next_corrected;
     let offset_next_corrected = bx.add(offset_next, bx.const_i32(slot_size));
     // update va_ndx
-    bx.store(offset_next_corrected, offset_ptr, bx.tcx().data_layout.pointer_align.abi);
+    bx.store(offset_next_corrected, offset_ptr, ptr_align_abi);
 
     // let stack_value_ptr = unsafe { (*va).va_stk.byte_add(offset_corrected) };
     let stack_area_ptr = bx.inbounds_ptradd(va_list_addr, bx.cx.const_usize(0));
-    let stack_area = bx.load(bx.type_ptr(), stack_area_ptr, bx.tcx().data_layout.pointer_align.abi);
+    let stack_area = bx.load(bx.type_ptr(), stack_area_ptr, ptr_align_abi);
     let stack_value_ptr = bx.inbounds_ptradd(stack_area, offset_corrected);
     bx.br(end);
 
@@ -861,7 +861,7 @@ fn emit_xtensa_va_arg<'ll, 'tcx>(
 
     // On big-endian, for values smaller than the slot size we'd have to align the read to the end
     // of the slot rather than the start. While the ISA and GCC support big-endian, all the Xtensa
-    // targets supported by rustc are litte-endian so don't worry about it.
+    // targets supported by rustc are little-endian so don't worry about it.
 
     // if from_regsave {
     //     unsafe { *regsave_value_ptr }