25 files changed, 340 insertions, 108 deletions
diff --git a/tests/codegen/align-fn.rs b/tests/codegen/align-fn.rs
index 97f23cc0423..660d8cd2bbf 100644
--- a/tests/codegen/align-fn.rs
+++ b/tests/codegen/align-fn.rs
@@ -47,3 +47,22 @@ impl T for () {}
 pub fn foo() {
     ().trait_method();
 }
+
+// CHECK-LABEL: align_specified_twice_1
+// CHECK-SAME: align 64
+#[no_mangle]
+#[repr(align(32), align(64))]
+pub fn align_specified_twice_1() {}
+
+// CHECK-LABEL: align_specified_twice_2
+// CHECK-SAME: align 128
+#[no_mangle]
+#[repr(align(128), align(32))]
+pub fn align_specified_twice_2() {}
+
+// CHECK-LABEL: align_specified_twice_3
+// CHECK-SAME: align 256
+#[no_mangle]
+#[repr(align(32))]
+#[repr(align(256))]
+pub fn align_specified_twice_3() {}
diff --git a/tests/codegen/align-struct.rs b/tests/codegen/align-struct.rs
index cc65b08a922..402a184d4c0 100644
--- a/tests/codegen/align-struct.rs
+++ b/tests/codegen/align-struct.rs
@@ -1,5 +1,7 @@
 //@ compile-flags: -C no-prepopulate-passes -Z mir-opt-level=0
-//
+// 32bit MSVC does not align things properly so we suppress high alignment annotations (#112480)
+//@ ignore-i686-pc-windows-msvc
+//@ ignore-i686-pc-windows-gnu
 
 #![crate_type = "lib"]
 
diff --git a/tests/codegen/autodiff/identical_fnc.rs b/tests/codegen/autodiff/identical_fnc.rs
new file mode 100644
index 00000000000..1c3277f52b4
--- /dev/null
+++ b/tests/codegen/autodiff/identical_fnc.rs
@@ -0,0 +1,45 @@
+//@ compile-flags: -Zautodiff=Enable -C opt-level=3  -Clto=fat
+//@ no-prefer-dynamic
+//@ needs-enzyme
+//
+// Each autodiff invocation creates a new placeholder function, which we will replace on llvm-ir
+// level. If a user tries to differentiate two identical functions within the same compilation unit,
+// then LLVM might merge them in release mode before AD. In that case we can't rewrite one of the
+// merged placeholder function anymore, and compilation would fail. We prevent this by disabling
+// LLVM's merge_function pass before AD. Here we implicetely test that our solution keeps working.
+// We also explicetly test that we keep running merge_function after AD, by checking for two
+// identical function calls in the LLVM-IR, while having two different calls in the Rust code.
+#![feature(autodiff)]
+
+use std::autodiff::autodiff;
+
+#[autodiff(d_square, Reverse, Duplicated, Active)]
+fn square(x: &f64) -> f64 {
+    x * x
+}
+
+#[autodiff(d_square2, Reverse, Duplicated, Active)]
+fn square2(x: &f64) -> f64 {
+    x * x
+}
+
+// CHECK:; identical_fnc::main
+// CHECK-NEXT:; Function Attrs:
+// CHECK-NEXT:define internal void @_ZN13identical_fnc4main17hf4dbc69c8d2f9130E()
+// CHECK-NEXT:start:
+// CHECK-NOT:br
+// CHECK-NOT:ret
+// CHECK:; call identical_fnc::d_square
+// CHECK-NEXT:  call fastcc void @_ZN13identical_fnc8d_square17h4c364207a2f8e06dE(double %x.val, ptr noalias noundef nonnull align 8 dereferenceable(8) %dx1)
+// CHECK-NEXT:; call identical_fnc::d_square
+// CHECK-NEXT:  call fastcc void @_ZN13identical_fnc8d_square17h4c364207a2f8e06dE(double %x.val, ptr noalias noundef nonnull align 8 dereferenceable(8) %dx2)
+
+fn main() {
+    let x = std::hint::black_box(3.0);
+    let mut dx1 = std::hint::black_box(1.0);
+    let mut dx2 = std::hint::black_box(1.0);
+    let _ = d_square(&x, &mut dx1, 1.0);
+    let _ = d_square2(&x, &mut dx2, 1.0);
+    assert_eq!(dx1, 6.0);
+    assert_eq!(dx2, 6.0);
+}
diff --git a/tests/codegen/autodiffv2.rs b/tests/codegen/autodiffv2.rs
new file mode 100644
index 00000000000..a40d19d3be3
--- /dev/null
+++ b/tests/codegen/autodiffv2.rs
@@ -0,0 +1,113 @@
+//@ compile-flags: -Zautodiff=Enable -C opt-level=3  -Clto=fat
+//@ no-prefer-dynamic
+//@ needs-enzyme
+//
+// In Enzyme, we test against a large range of LLVM versions (5+) and don't have overly many
+// breakages. One benefit is that we match the IR generated by Enzyme only after running it
+// through LLVM's O3 pipeline, which will remove most of the noise.
+// However, our integration test could also be affected by changes in how rustc lowers MIR into
+// LLVM-IR, which could cause additional noise and thus breakages. If that's the case, we should
+// reduce this test to only match the first lines and the ret instructions.
+//
+// The function tested here has 4 inputs and 5 outputs, so we could either call forward-mode
+// autodiff 4 times, or reverse mode 5 times. Since a forward-mode call is usually faster than
+// reverse mode, we prefer it here. This file also tests a new optimization (batch mode), which
+// allows us to call forward-mode autodiff only once, and get all 5 outputs in a single call.
+//
+// We support 2 different batch modes. `d_square2` has the same interface as scalar forward-mode,
+// but each shadow argument is `width` times larger (thus 16 and 20 elements here).
+// `d_square3` instead takes `width` (4) shadow arguments, which are all the same size as the
+// original function arguments.
+//
+// FIXME(autodiff): We currently can't test `d_square1` and `d_square3` in the same file, since they
+// generate the same dummy functions which get merged by LLVM, breaking pieces of our pipeline which
+// try to rewrite the dummy functions later. We should consider to change to pure declarations both
+// in our frontend and in the llvm backend to avoid these issues.
+
+#![feature(autodiff)]
+
+use std::autodiff::autodiff;
+
+#[no_mangle]
+//#[autodiff(d_square1, Forward, Dual, Dual)]
+#[autodiff(d_square2, Forward, 4, Dualv, Dualv)]
+#[autodiff(d_square3, Forward, 4, Dual, Dual)]
+fn square(x: &[f32], y: &mut [f32]) {
+    assert!(x.len() >= 4);
+    assert!(y.len() >= 5);
+    y[0] = 4.3 * x[0] + 1.2 * x[1] + 3.4 * x[2] + 2.1 * x[3];
+    y[1] = 2.3 * x[0] + 4.5 * x[1] + 1.7 * x[2] + 6.4 * x[3];
+    y[2] = 1.1 * x[0] + 3.3 * x[1] + 2.5 * x[2] + 4.7 * x[3];
+    y[3] = 5.2 * x[0] + 1.4 * x[1] + 2.6 * x[2] + 3.8 * x[3];
+    y[4] = 1.0 * x[0] + 2.0 * x[1] + 3.0 * x[2] + 4.0 * x[3];
+}
+
+fn main() {
+    let x1 = std::hint::black_box(vec![0.0, 1.0, 2.0, 3.0]);
+
+    let dx1 = std::hint::black_box(vec![1.0; 12]);
+
+    let z1 = std::hint::black_box(vec![1.0, 0.0, 0.0, 0.0]);
+    let z2 = std::hint::black_box(vec![0.0, 1.0, 0.0, 0.0]);
+    let z3 = std::hint::black_box(vec![0.0, 0.0, 1.0, 0.0]);
+    let z4 = std::hint::black_box(vec![0.0, 0.0, 0.0, 1.0]);
+
+    let z5 = std::hint::black_box(vec![
+        1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0,
+    ]);
+
+    let mut y1 = std::hint::black_box(vec![0.0; 5]);
+    let mut y2 = std::hint::black_box(vec![0.0; 5]);
+    let mut y3 = std::hint::black_box(vec![0.0; 5]);
+    let mut y4 = std::hint::black_box(vec![0.0; 5]);
+
+    let mut y5 = std::hint::black_box(vec![0.0; 5]);
+
+    let mut y6 = std::hint::black_box(vec![0.0; 5]);
+
+    let mut dy1_1 = std::hint::black_box(vec![0.0; 5]);
+    let mut dy1_2 = std::hint::black_box(vec![0.0; 5]);
+    let mut dy1_3 = std::hint::black_box(vec![0.0; 5]);
+    let mut dy1_4 = std::hint::black_box(vec![0.0; 5]);
+
+    let mut dy2 = std::hint::black_box(vec![0.0; 20]);
+
+    let mut dy3_1 = std::hint::black_box(vec![0.0; 5]);
+    let mut dy3_2 = std::hint::black_box(vec![0.0; 5]);
+    let mut dy3_3 = std::hint::black_box(vec![0.0; 5]);
+    let mut dy3_4 = std::hint::black_box(vec![0.0; 5]);
+
+    // scalar.
+    //d_square1(&x1, &z1, &mut y1, &mut dy1_1);
+    //d_square1(&x1, &z2, &mut y2, &mut dy1_2);
+    //d_square1(&x1, &z3, &mut y3, &mut dy1_3);
+    //d_square1(&x1, &z4, &mut y4, &mut dy1_4);
+
+    // assert y1 == y2 == y3 == y4
+    //for i in 0..5 {
+    //    assert_eq!(y1[i], y2[i]);
+    //    assert_eq!(y1[i], y3[i]);
+    //    assert_eq!(y1[i], y4[i]);
+    //}
+
+    // batch mode A)
+    d_square2(&x1, &z5, &mut y5, &mut dy2);
+
+    // assert y1 == y2 == y3 == y4 == y5
+    //for i in 0..5 {
+    //    assert_eq!(y1[i], y5[i]);
+    //}
+
+    // batch mode B)
+    d_square3(&x1, &z1, &z2, &z3, &z4, &mut y6, &mut dy3_1, &mut dy3_2, &mut dy3_3, &mut dy3_4);
+    for i in 0..5 {
+        assert_eq!(y5[i], y6[i]);
+    }
+
+    for i in 0..5 {
+        assert_eq!(dy2[0..5][i], dy3_1[i]);
+        assert_eq!(dy2[5..10][i], dy3_2[i]);
+        assert_eq!(dy2[10..15][i], dy3_3[i]);
+        assert_eq!(dy2[15..20][i], dy3_4[i]);
+    }
+}
diff --git a/tests/codegen/cffi/c-variadic-naked.rs b/tests/codegen/cffi/c-variadic-naked.rs
index 24b69c5f59e..5843628b633 100644
--- a/tests/codegen/cffi/c-variadic-naked.rs
+++ b/tests/codegen/cffi/c-variadic-naked.rs
@@ -5,14 +5,11 @@
 
 #![crate_type = "lib"]
 #![feature(c_variadic)]
-#![feature(naked_functions)]
 #![no_std]
 
-#[naked]
+#[unsafe(naked)]
 pub unsafe extern "C" fn c_variadic(_: usize, _: ...) {
     // CHECK-NOT: va_start
     // CHECK-NOT: alloca
-    core::arch::naked_asm! {
-        "ret",
-    }
+    core::arch::naked_asm!("ret")
 }
diff --git a/tests/codegen/const-vector.rs b/tests/codegen/const-vector.rs
index 8343594e5d2..1d4edc39b1c 100644
--- a/tests/codegen/const-vector.rs
+++ b/tests/codegen/const-vector.rs
@@ -8,6 +8,7 @@
 #![feature(repr_simd)]
 #![feature(rustc_attrs)]
 #![feature(simd_ffi)]
+#![feature(arm_target_feature)]
 #![allow(non_camel_case_types)]
 
 // Setting up structs that can be used as const vectors
@@ -28,33 +29,12 @@ pub struct Simd<T, const N: usize>([T; N]);
 
 extern "unadjusted" {
     fn test_i8x2(a: i8x2);
-}
-
-extern "unadjusted" {
     fn test_i8x2_two_args(a: i8x2, b: i8x2);
-}
-
-extern "unadjusted" {
     fn test_i8x2_mixed_args(a: i8x2, c: i32, b: i8x2);
-}
-
-extern "unadjusted" {
     fn test_i8x2_arr(a: i8x2);
-}
-
-extern "unadjusted" {
     fn test_f32x2(a: f32x2);
-}
-
-extern "unadjusted" {
     fn test_f32x2_arr(a: f32x2);
-}
-
-extern "unadjusted" {
     fn test_simd(a: Simd<i32, 4>);
-}
-
-extern "unadjusted" {
     fn test_simd_unaligned(a: Simd<i32, 3>);
 }
 
@@ -62,6 +42,9 @@ extern "unadjusted" {
 // if the size is not a power of 2
 // CHECK: %"Simd<i32, 3>" = type { [3 x i32] }
 
+#[cfg_attr(target_family = "wasm", target_feature(enable = "simd128"))]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "neon"))]
+#[cfg_attr(target_arch = "x86", target_feature(enable = "sse"))]
 pub fn do_call() {
     unsafe {
         // CHECK: call void @test_i8x2(<2 x i8> <i8 32, i8 64>
diff --git a/tests/codegen/enum/enum-match.rs b/tests/codegen/enum/enum-match.rs
index 6e185cf8932..6da6ad1f078 100644
--- a/tests/codegen/enum/enum-match.rs
+++ b/tests/codegen/enum/enum-match.rs
@@ -15,7 +15,7 @@ pub enum Enum0 {
     B,
 }
 
-// CHECK-LABEL: define noundef{{( range\(i8 [0-9]+, [0-9]+\))?}} i8 @match0(i8{{.+}}%0)
+// CHECK-LABEL: define{{( dso_local)?}} noundef{{( range\(i8 [0-9]+, [0-9]+\))?}} i8 @match0(i8{{.+}}%0)
 // CHECK-NEXT: start:
 // CHECK-NEXT: %[[IS_B:.+]] = icmp eq i8 %0, 2
 // CHECK-NEXT: %[[TRUNC:.+]] = and i8 %0, 1
@@ -37,7 +37,7 @@ pub enum Enum1 {
     C,
 }
 
-// CHECK-LABEL: define noundef{{( range\(i8 [0-9]+, [0-9]+\))?}} i8 @match1(i8{{.+}}%0)
+// CHECK-LABEL: define{{( dso_local)?}} noundef{{( range\(i8 [0-9]+, [0-9]+\))?}} i8 @match1(i8{{.+}}%0)
 // CHECK-NEXT: start:
 // CHECK-NEXT: %[[REL_VAR:.+]] = add{{( nsw)?}} i8 %0, -2
 // CHECK-NEXT: %[[REL_VAR_WIDE:.+]] = zext i8 %[[REL_VAR]] to i64
@@ -98,7 +98,7 @@ pub enum Enum2 {
     E,
 }
 
-// CHECK-LABEL: define noundef{{( range\(i8 [0-9]+, [0-9]+\))?}} i8 @match2(i8{{.+}}%0)
+// CHECK-LABEL: define{{( dso_local)?}} noundef{{( range\(i8 [0-9]+, [0-9]+\))?}} i8 @match2(i8{{.+}}%0)
 // CHECK-NEXT: start:
 // CHECK-NEXT: %[[REL_VAR:.+]] = add i8 %0, 2
 // CHECK-NEXT: %[[REL_VAR_WIDE:.+]] = zext i8 %[[REL_VAR]] to i64
@@ -121,7 +121,7 @@ pub fn match2(e: Enum2) -> u8 {
 // And make sure it works even if the niched scalar is a pointer.
 // (For example, that we don't try to `sub` on pointers.)
 
-// CHECK-LABEL: define noundef{{( range\(i16 -?[0-9]+, -?[0-9]+\))?}} i16 @match3(ptr{{.+}}%0)
+// CHECK-LABEL: define{{( dso_local)?}} noundef{{( range\(i16 -?[0-9]+, -?[0-9]+\))?}} i16 @match3(ptr{{.+}}%0)
 // CHECK-NEXT: start:
 // CHECK-NEXT: %[[IS_NULL:.+]] = icmp eq ptr %0, null
 // CHECK-NEXT: br i1 %[[IS_NULL]]
@@ -145,7 +145,7 @@ pub enum MiddleNiche {
     E,
 }
 
-// CHECK-LABEL: define noundef{{( range\(i8 -?[0-9]+, -?[0-9]+\))?}} i8 @match4(i8{{.+}}%0)
+// CHECK-LABEL: define{{( dso_local)?}} noundef{{( range\(i8 -?[0-9]+, -?[0-9]+\))?}} i8 @match4(i8{{.+}}%0)
 // CHECK-NEXT: start:
 // CHECK-NEXT: %[[REL_VAR:.+]] = add{{( nsw)?}} i8 %0, -2
 // CHECK-NEXT: %[[IS_NICHE:.+]] = icmp ult i8 %[[REL_VAR]], 5
@@ -449,7 +449,7 @@ pub enum HugeVariantIndex {
     Possible259,
 }
 
-// CHECK-LABEL: define noundef{{( range\(i8 [0-9]+, [0-9]+\))?}} i8 @match5(i8{{.+}}%0)
+// CHECK-LABEL: define{{( dso_local)?}} noundef{{( range\(i8 [0-9]+, [0-9]+\))?}} i8 @match5(i8{{.+}}%0)
 // CHECK-NEXT: start:
 // CHECK-NEXT: %[[REL_VAR:.+]] = add{{( nsw)?}} i8 %0, -2
 // CHECK-NEXT: %[[REL_VAR_WIDE:.+]] = zext i8 %[[REL_VAR]] to i64
diff --git a/tests/codegen/intrinsic-no-unnamed-attr.rs b/tests/codegen/intrinsic-no-unnamed-attr.rs
index 35eb025ab6b..4bec579831d 100644
--- a/tests/codegen/intrinsic-no-unnamed-attr.rs
+++ b/tests/codegen/intrinsic-no-unnamed-attr.rs
@@ -1,9 +1,8 @@
 //@ compile-flags: -C no-prepopulate-passes
 
-#![feature(intrinsics)]
+#![feature(core_intrinsics)]
 
-#[rustc_intrinsic]
-unsafe fn sqrtf32(x: f32) -> f32;
+use std::intrinsics::sqrtf32;
 
 // CHECK: @llvm.sqrt.f32(float) #{{[0-9]*}}
 
diff --git a/tests/codegen/issues/issue-101082.rs b/tests/codegen/issues/issue-101082.rs
index 96cdff64dda..8d15921ddb4 100644
--- a/tests/codegen/issues/issue-101082.rs
+++ b/tests/codegen/issues/issue-101082.rs
@@ -12,6 +12,7 @@
 // at the time still sometimes fails, so only verify it for the power-of-two size
 // - https://github.com/llvm/llvm-project/issues/134735
 //@[x86-64-v3] only-x86_64
+//@[x86-64-v3] min-llvm-version: 21
 //@[x86-64-v3] compile-flags: -Ctarget-cpu=x86-64-v3
 
 #![crate_type = "lib"]
@@ -19,16 +20,7 @@
 #[no_mangle]
 pub fn test() -> usize {
     // CHECK-LABEL: @test(
-    // host: ret {{i64|i32}} 165
-    // x86-64: ret {{i64|i32}} 165
-
-    // FIXME: Now that this autovectorizes via a masked load, it doesn't actually
-    // const-fold for certain widths.  The `test_eight` case below shows that, yes,
-    // what we're emitting *can* be const-folded, except that the way LLVM does it
-    // for certain widths doesn't today.  We should be able to put this back to
-    // the same check after <https://github.com/llvm/llvm-project/issues/134513>
-    // x86-64-v3: masked.load
-
+    // CHECK: ret {{i64|i32}} 165
     let values = [23, 16, 54, 3, 60, 9];
     let mut acc = 0;
     for item in values {
diff --git a/tests/codegen/issues/issue-56927.rs b/tests/codegen/issues/issue-56927.rs
index a40718689b3..415ef073e03 100644
--- a/tests/codegen/issues/issue-56927.rs
+++ b/tests/codegen/issues/issue-56927.rs
@@ -1,4 +1,7 @@
 //@ compile-flags: -C no-prepopulate-passes
+// 32bit MSVC does not align things properly so we suppress high alignment annotations (#112480)
+//@ ignore-i686-pc-windows-msvc
+//@ ignore-i686-pc-windows-gnu
 
 #![crate_type = "rlib"]
 
diff --git a/tests/codegen/naked-asan.rs b/tests/codegen/naked-asan.rs
index 8efedab6ea5..223c41b15bb 100644
--- a/tests/codegen/naked-asan.rs
+++ b/tests/codegen/naked-asan.rs
@@ -6,17 +6,17 @@
 
 #![crate_type = "lib"]
 #![no_std]
-#![feature(abi_x86_interrupt, naked_functions)]
+#![feature(abi_x86_interrupt)]
 
 pub fn caller() {
     page_fault_handler(1, 2);
 }
 
 // CHECK: declare x86_intrcc void @page_fault_handler(ptr {{.*}}, i64{{.*}}){{.*}}#[[ATTRS:[0-9]+]]
-#[naked]
+#[unsafe(naked)]
 #[no_mangle]
 pub extern "x86-interrupt" fn page_fault_handler(_: u64, _: u64) {
-    unsafe { core::arch::naked_asm!("ud2") };
+    core::arch::naked_asm!("ud2")
 }
 
 // CHECK: #[[ATTRS]] =
diff --git a/tests/codegen/naked-fn/aligned.rs b/tests/codegen/naked-fn/aligned.rs
index d9dcd7f6c3e..47ef779f1b2 100644
--- a/tests/codegen/naked-fn/aligned.rs
+++ b/tests/codegen/naked-fn/aligned.rs
@@ -3,15 +3,15 @@
 //@ ignore-arm no "ret" mnemonic
 
 #![crate_type = "lib"]
-#![feature(naked_functions, fn_align)]
+#![feature(fn_align)]
 use std::arch::naked_asm;
 
 // CHECK: .balign 16
 // CHECK-LABEL: naked_empty:
 #[repr(align(16))]
 #[no_mangle]
-#[naked]
-pub unsafe extern "C" fn naked_empty() {
+#[unsafe(naked)]
+pub extern "C" fn naked_empty() {
     // CHECK: ret
-    naked_asm!("ret");
+    naked_asm!("ret")
 }
diff --git a/tests/codegen/naked-fn/generics.rs b/tests/codegen/naked-fn/generics.rs
index 64998df64dd..865be00d91e 100644
--- a/tests/codegen/naked-fn/generics.rs
+++ b/tests/codegen/naked-fn/generics.rs
@@ -2,7 +2,6 @@
 //@ only-x86_64
 
 #![crate_type = "lib"]
-#![feature(naked_functions, asm_const)]
 
 use std::arch::naked_asm;
 
@@ -28,21 +27,19 @@ fn test(x: u64) {
 // CHECK: add rax, 1
 // CHECK: add rax, 42
 
-#[naked]
+#[unsafe(naked)]
 pub extern "C" fn using_const_generics<const N: u64>(x: u64) -> u64 {
     const M: u64 = 42;
 
-    unsafe {
-        naked_asm!(
-            "xor rax, rax",
-            "add rax, rdi",
-            "add rax, {}",
-            "add rax, {}",
-            "ret",
-            const N,
-            const M,
-        )
-    }
+    naked_asm!(
+        "xor rax, rax",
+        "add rax, rdi",
+        "add rax, {}",
+        "add rax, {}",
+        "ret",
+        const N,
+        const M,
+    )
 }
 
 trait Invert {
@@ -60,16 +57,14 @@ impl Invert for i64 {
 // CHECK: call
 // CHECK: ret
 
-#[naked]
+#[unsafe(naked)]
 #[no_mangle]
 pub extern "C" fn generic_function<T: Invert>(x: i64) -> i64 {
-    unsafe {
-        naked_asm!(
-            "call {}",
-            "ret",
-            sym <T as Invert>::invert,
-        )
-    }
+    naked_asm!(
+        "call {}",
+        "ret",
+        sym <T as Invert>::invert,
+    )
 }
 
 #[derive(Copy, Clone)]
@@ -81,10 +76,10 @@ struct Foo(u64);
 // CHECK: mov rax, rdi
 
 impl Foo {
-    #[naked]
+    #[unsafe(naked)]
     #[no_mangle]
     extern "C" fn method(self) -> u64 {
-        unsafe { naked_asm!("mov rax, rdi", "ret") }
+        naked_asm!("mov rax, rdi", "ret")
     }
 }
 
@@ -97,10 +92,10 @@ trait Bar {
 }
 
 impl Bar for Foo {
-    #[naked]
+    #[unsafe(naked)]
     #[no_mangle]
     extern "C" fn trait_method(self) -> u64 {
-        unsafe { naked_asm!("mov rax, rdi", "ret") }
+        naked_asm!("mov rax, rdi", "ret")
     }
 }
 
@@ -109,7 +104,7 @@ impl Bar for Foo {
 // CHECK: lea rax, [rdi + rsi]
 
 // this previously ICE'd, see https://github.com/rust-lang/rust/issues/124375
-#[naked]
+#[unsafe(naked)]
 #[no_mangle]
 pub unsafe extern "C" fn naked_with_args_and_return(a: isize, b: isize) -> isize {
     naked_asm!("lea rax, [rdi + rsi]", "ret");
diff --git a/tests/codegen/naked-fn/instruction-set.rs b/tests/codegen/naked-fn/instruction-set.rs
index a7b4c22a59b..67560c5aba7 100644
--- a/tests/codegen/naked-fn/instruction-set.rs
+++ b/tests/codegen/naked-fn/instruction-set.rs
@@ -6,7 +6,7 @@
 //@ [thumb-mode] needs-llvm-components: arm
 
 #![crate_type = "lib"]
-#![feature(no_core, lang_items, rustc_attrs, naked_functions)]
+#![feature(no_core, lang_items, rustc_attrs)]
 #![no_core]
 
 extern crate minicore;
@@ -20,8 +20,8 @@ use minicore::*;
 // arm-mode: .arm
 // thumb-mode: .thumb
 #[no_mangle]
-#[naked]
-unsafe extern "C" fn test_unspecified() {
+#[unsafe(naked)]
+extern "C" fn test_unspecified() {
     naked_asm!("bx lr");
 }
 
@@ -33,9 +33,9 @@ unsafe extern "C" fn test_unspecified() {
 // arm-mode: .arm
 // thumb-mode: .thumb
 #[no_mangle]
-#[naked]
+#[unsafe(naked)]
 #[instruction_set(arm::t32)]
-unsafe extern "C" fn test_thumb() {
+extern "C" fn test_thumb() {
     naked_asm!("bx lr");
 }
 
@@ -46,8 +46,8 @@ unsafe extern "C" fn test_thumb() {
 // arm-mode: .arm
 // thumb-mode: .thumb
 #[no_mangle]
-#[naked]
+#[unsafe(naked)]
 #[instruction_set(arm::a32)]
-unsafe extern "C" fn test_arm() {
+extern "C" fn test_arm() {
     naked_asm!("bx lr");
 }
diff --git a/tests/codegen/naked-fn/min-function-alignment.rs b/tests/codegen/naked-fn/min-function-alignment.rs
index 1330d796d39..1d778be8c90 100644
--- a/tests/codegen/naked-fn/min-function-alignment.rs
+++ b/tests/codegen/naked-fn/min-function-alignment.rs
@@ -2,31 +2,31 @@
 //@ needs-asm-support
 //@ ignore-arm no "ret" mnemonic
 
-#![feature(naked_functions, fn_align)]
+#![feature(fn_align)]
 #![crate_type = "lib"]
 
 // functions without explicit alignment use the global minimum
 //
 // CHECK: .balign 16
 #[no_mangle]
-#[naked]
-pub unsafe extern "C" fn naked_no_explicit_align() {
+#[unsafe(naked)]
+pub extern "C" fn naked_no_explicit_align() {
     core::arch::naked_asm!("ret")
 }
 
 // CHECK: .balign 16
 #[no_mangle]
 #[repr(align(8))]
-#[naked]
-pub unsafe extern "C" fn naked_lower_align() {
+#[unsafe(naked)]
+pub extern "C" fn naked_lower_align() {
     core::arch::naked_asm!("ret")
 }
 
 // CHECK: .balign 32
 #[no_mangle]
 #[repr(align(32))]
-#[naked]
-pub unsafe extern "C" fn naked_higher_align() {
+#[unsafe(naked)]
+pub extern "C" fn naked_higher_align() {
     core::arch::naked_asm!("ret")
 }
 
@@ -38,7 +38,7 @@ pub unsafe extern "C" fn naked_higher_align() {
 // CHECK: .balign 16
 #[no_mangle]
 #[cold]
-#[naked]
-pub unsafe extern "C" fn no_explicit_align_cold() {
+#[unsafe(naked)]
+pub extern "C" fn no_explicit_align_cold() {
     core::arch::naked_asm!("ret")
 }
diff --git a/tests/codegen/naked-fn/naked-functions.rs b/tests/codegen/naked-fn/naked-functions.rs
index 3fe795178b7..344af6eb42f 100644
--- a/tests/codegen/naked-fn/naked-functions.rs
+++ b/tests/codegen/naked-fn/naked-functions.rs
@@ -13,7 +13,7 @@
 //@[thumb] needs-llvm-components: arm
 
 #![crate_type = "lib"]
-#![feature(no_core, lang_items, rustc_attrs, naked_functions)]
+#![feature(no_core, lang_items, rustc_attrs)]
 #![no_core]
 
 extern crate minicore;
@@ -60,8 +60,8 @@ use minicore::*;
 // linux,win: .att_syntax
 
 #[no_mangle]
-#[naked]
-pub unsafe extern "C" fn naked_empty() {
+#[unsafe(naked)]
+pub extern "C" fn naked_empty() {
     #[cfg(not(all(target_arch = "arm", target_feature = "thumb-mode")))]
     naked_asm!("ret");
 
@@ -114,8 +114,8 @@ pub unsafe extern "C" fn naked_empty() {
 // linux,win: .att_syntax
 
 #[no_mangle]
-#[naked]
-pub unsafe extern "C" fn naked_with_args_and_return(a: isize, b: isize) -> isize {
+#[unsafe(naked)]
+pub extern "C" fn naked_with_args_and_return(a: isize, b: isize) -> isize {
     #[cfg(any(target_os = "windows", target_os = "linux"))]
     {
         naked_asm!("lea rax, [rdi + rsi]", "ret")
@@ -138,9 +138,9 @@ pub unsafe extern "C" fn naked_with_args_and_return(a: isize, b: isize) -> isize
 // thumb:            .pushsection .text.some_different_name,\22ax\22, %progbits
 // CHECK-LABEL: test_link_section:
 #[no_mangle]
-#[naked]
+#[unsafe(naked)]
 #[link_section = ".text.some_different_name"]
-pub unsafe extern "C" fn test_link_section() {
+pub extern "C" fn test_link_section() {
     #[cfg(not(all(target_arch = "arm", target_feature = "thumb-mode")))]
     naked_asm!("ret");
 
@@ -159,7 +159,7 @@ pub unsafe extern "C" fn test_link_section() {
 // win_i686-LABEL: @fastcall_cc@4:
 #[cfg(target_os = "windows")]
 #[no_mangle]
-#[naked]
-pub unsafe extern "fastcall" fn fastcall_cc(x: i32) -> i32 {
+#[unsafe(naked)]
+pub extern "fastcall" fn fastcall_cc(x: i32) -> i32 {
     naked_asm!("ret");
 }
diff --git a/tests/codegen/regparm-inreg.rs b/tests/codegen/regparm-inreg.rs
index 8dae3a83e4e..15702804dfd 100644
--- a/tests/codegen/regparm-inreg.rs
+++ b/tests/codegen/regparm-inreg.rs
@@ -3,7 +3,7 @@
 // x86 only.
 
 //@ add-core-stubs
-//@ compile-flags: --target i686-unknown-linux-gnu -Cno-prepopulate-passes -Copt-level=3
+//@ compile-flags: --target i686-unknown-linux-gnu -Cno-prepopulate-passes -Copt-level=3 -Ctarget-feature=+avx
 //@ needs-llvm-components: x86
 
 //@ revisions:regparm0 regparm1 regparm2 regparm3
diff --git a/tests/codegen/remap_path_prefix/aux_mod.rs b/tests/codegen/remap_path_prefix/aux_mod.rs
index c37e91c705c..3217e9e51e7 100644
--- a/tests/codegen/remap_path_prefix/aux_mod.rs
+++ b/tests/codegen/remap_path_prefix/aux_mod.rs
@@ -1,4 +1,4 @@
-//@ ignore-test: this is not a test
+//@ ignore-auxiliary (used by `./main.rs`)
 
 #[inline]
 pub fn some_aux_mod_function() -> i32 {
diff --git a/tests/codegen/repr/transparent.rs b/tests/codegen/repr/transparent.rs
index e7e4c40a099..5475bfb6b65 100644
--- a/tests/codegen/repr/transparent.rs
+++ b/tests/codegen/repr/transparent.rs
@@ -9,7 +9,7 @@
 // For LoongArch: see codegen/loongarch-abi
 
 #![crate_type = "lib"]
-#![feature(repr_simd, transparent_unions)]
+#![feature(repr_simd, transparent_unions, arm_target_feature)]
 
 use std::marker::PhantomData;
 
@@ -139,6 +139,9 @@ pub struct Vector(f32x4);
 
 // CHECK: define{{.*}}<4 x float> @test_Vector(<4 x float> %_1)
 #[no_mangle]
+#[cfg_attr(target_family = "wasm", target_feature(enable = "simd128"))]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "neon"))]
+#[cfg_attr(target_arch = "x86", target_feature(enable = "sse"))]
 pub extern "C" fn test_Vector(_: Vector) -> Vector {
     loop {}
 }
diff --git a/tests/codegen/simd-intrinsic/simd-intrinsic-generic-gather.rs b/tests/codegen/simd-intrinsic/simd-intrinsic-generic-gather.rs
index 9bb46a3546b..c06b36d68b9 100644
--- a/tests/codegen/simd-intrinsic/simd-intrinsic-generic-gather.rs
+++ b/tests/codegen/simd-intrinsic/simd-intrinsic-generic-gather.rs
@@ -29,6 +29,19 @@ pub unsafe fn gather_f32x2(
     simd_gather(values, pointers, mask)
 }
 
+// CHECK-LABEL: @gather_f32x2_unsigned
+#[no_mangle]
+pub unsafe fn gather_f32x2_unsigned(
+    pointers: Vec2<*const f32>,
+    mask: Vec2<u32>,
+    values: Vec2<f32>,
+) -> Vec2<f32> {
+    // CHECK: [[A:%[0-9]+]] = lshr <2 x i32> {{.*}}, {{<i32 31, i32 31>|splat \(i32 31\)}}
+    // CHECK: [[B:%[0-9]+]] = trunc <2 x i32> [[A]] to <2 x i1>
+    // CHECK: call <2 x float> @llvm.masked.gather.v2f32.v2p0(<2 x ptr> {{.*}}, i32 {{.*}}, <2 x i1> [[B]], <2 x float> {{.*}})
+    simd_gather(values, pointers, mask)
+}
+
 // CHECK-LABEL: @gather_pf32x2
 #[no_mangle]
 pub unsafe fn gather_pf32x2(
diff --git a/tests/codegen/simd-intrinsic/simd-intrinsic-generic-masked-load.rs b/tests/codegen/simd-intrinsic/simd-intrinsic-generic-masked-load.rs
index fcc4cb5d630..21578e67cff 100644
--- a/tests/codegen/simd-intrinsic/simd-intrinsic-generic-masked-load.rs
+++ b/tests/codegen/simd-intrinsic/simd-intrinsic-generic-masked-load.rs
@@ -23,6 +23,19 @@ pub unsafe fn load_f32x2(mask: Vec2<i32>, pointer: *const f32, values: Vec2<f32>
     simd_masked_load(mask, pointer, values)
 }
 
+// CHECK-LABEL: @load_f32x2_unsigned
+#[no_mangle]
+pub unsafe fn load_f32x2_unsigned(
+    mask: Vec2<u32>,
+    pointer: *const f32,
+    values: Vec2<f32>,
+) -> Vec2<f32> {
+    // CHECK: [[A:%[0-9]+]] = lshr <2 x i32> {{.*}}, {{<i32 31, i32 31>|splat \(i32 31\)}}
+    // CHECK: [[B:%[0-9]+]] = trunc <2 x i32> [[A]] to <2 x i1>
+    // CHECK: call <2 x float> @llvm.masked.load.v2f32.p0(ptr {{.*}}, i32 4, <2 x i1> [[B]], <2 x float> {{.*}})
+    simd_masked_load(mask, pointer, values)
+}
+
 // CHECK-LABEL: @load_pf32x4
 #[no_mangle]
 pub unsafe fn load_pf32x4(
diff --git a/tests/codegen/simd-intrinsic/simd-intrinsic-generic-masked-store.rs b/tests/codegen/simd-intrinsic/simd-intrinsic-generic-masked-store.rs
index 04f4a0c6382..22a8f7e54bd 100644
--- a/tests/codegen/simd-intrinsic/simd-intrinsic-generic-masked-store.rs
+++ b/tests/codegen/simd-intrinsic/simd-intrinsic-generic-masked-store.rs
@@ -23,6 +23,15 @@ pub unsafe fn store_f32x2(mask: Vec2<i32>, pointer: *mut f32, values: Vec2<f32>)
     simd_masked_store(mask, pointer, values)
 }
 
+// CHECK-LABEL: @store_f32x2_unsigned
+#[no_mangle]
+pub unsafe fn store_f32x2_unsigned(mask: Vec2<u32>, pointer: *mut f32, values: Vec2<f32>) {
+    // CHECK: [[A:%[0-9]+]] = lshr <2 x i32> {{.*}}, {{<i32 31, i32 31>|splat \(i32 31\)}}
+    // CHECK: [[B:%[0-9]+]] = trunc <2 x i32> [[A]] to <2 x i1>
+    // CHECK: call void @llvm.masked.store.v2f32.p0(<2 x float> {{.*}}, ptr {{.*}}, i32 4, <2 x i1> [[B]])
+    simd_masked_store(mask, pointer, values)
+}
+
 // CHECK-LABEL: @store_pf32x4
 #[no_mangle]
 pub unsafe fn store_pf32x4(mask: Vec4<i32>, pointer: *mut *const f32, values: Vec4<*const f32>) {
diff --git a/tests/codegen/simd-intrinsic/simd-intrinsic-generic-scatter.rs b/tests/codegen/simd-intrinsic/simd-intrinsic-generic-scatter.rs
index 9506f8f6d3a..0cc9e6ae59a 100644
--- a/tests/codegen/simd-intrinsic/simd-intrinsic-generic-scatter.rs
+++ b/tests/codegen/simd-intrinsic/simd-intrinsic-generic-scatter.rs
@@ -25,6 +25,15 @@ pub unsafe fn scatter_f32x2(pointers: Vec2<*mut f32>, mask: Vec2<i32>, values: V
     simd_scatter(values, pointers, mask)
 }
 
+// CHECK-LABEL: @scatter_f32x2_unsigned
+#[no_mangle]
+pub unsafe fn scatter_f32x2_unsigned(pointers: Vec2<*mut f32>, mask: Vec2<u32>, values: Vec2<f32>) {
+    // CHECK: [[A:%[0-9]+]] = lshr <2 x i32> {{.*}}, {{<i32 31, i32 31>|splat \(i32 31\)}}
+    // CHECK: [[B:%[0-9]+]] = trunc <2 x i32> [[A]] to <2 x i1>
+    // CHECK: call void @llvm.masked.scatter.v2f32.v2p0(<2 x float> {{.*}}, <2 x ptr> {{.*}}, i32 {{.*}}, <2 x i1> [[B]]
+    simd_scatter(values, pointers, mask)
+}
+
 // CHECK-LABEL: @scatter_pf32x2
 #[no_mangle]
 pub unsafe fn scatter_pf32x2(
diff --git a/tests/codegen/simd-intrinsic/simd-intrinsic-generic-select.rs b/tests/codegen/simd-intrinsic/simd-intrinsic-generic-select.rs
index 71279d9f0ea..f6531c1b23a 100644
--- a/tests/codegen/simd-intrinsic/simd-intrinsic-generic-select.rs
+++ b/tests/codegen/simd-intrinsic/simd-intrinsic-generic-select.rs
@@ -22,6 +22,10 @@ pub struct b8x4(pub [i8; 4]);
 #[derive(Copy, Clone, PartialEq, Debug)]
 pub struct i32x4([i32; 4]);
 
+#[repr(simd)]
+#[derive(Copy, Clone, PartialEq, Debug)]
+pub struct u32x4([u32; 4]);
+
 // CHECK-LABEL: @select_m8
 #[no_mangle]
 pub unsafe fn select_m8(m: b8x4, a: f32x4, b: f32x4) -> f32x4 {
@@ -40,6 +44,15 @@ pub unsafe fn select_m32(m: i32x4, a: f32x4, b: f32x4) -> f32x4 {
     simd_select(m, a, b)
 }
 
+// CHECK-LABEL: @select_m32_unsigned
+#[no_mangle]
+pub unsafe fn select_m32_unsigned(m: u32x4, a: f32x4, b: f32x4) -> f32x4 {
+    // CHECK: [[A:%[0-9]+]] = lshr <4 x i32> %{{.*}}, {{<i32 31, i32 31, i32 31, i32 31>|splat \(i32 31\)}}
+    // CHECK: [[B:%[0-9]+]] = trunc <4 x i32> [[A]] to <4 x i1>
+    // CHECK: select <4 x i1> [[B]]
+    simd_select(m, a, b)
+}
+
 // CHECK-LABEL: @select_bitmask
 #[no_mangle]
 pub unsafe fn select_bitmask(m: i8, a: f32x8, b: f32x8) -> f32x8 {
diff --git a/tests/codegen/simd/extract-insert-dyn.rs b/tests/codegen/simd/extract-insert-dyn.rs
index 584e2c7887a..2c64f5d3c09 100644
--- a/tests/codegen/simd/extract-insert-dyn.rs
+++ b/tests/codegen/simd/extract-insert-dyn.rs
@@ -1,6 +1,6 @@
 //@compile-flags: -C opt-level=3 -C no-prepopulate-passes
 
-#![feature(core_intrinsics, repr_simd)]
+#![feature(core_intrinsics, repr_simd, arm_target_feature)]
 #![no_std]
 #![crate_type = "lib"]
 #![allow(non_camel_case_types)]
@@ -21,6 +21,9 @@ pub struct i8x16([i8; 16]);
 // CHECK-LABEL: dyn_simd_extract
 // CHECK: extractelement <16 x i8> %x, i32 %idx
 #[no_mangle]
+#[cfg_attr(target_family = "wasm", target_feature(enable = "simd128"))]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "neon"))]
+#[cfg_attr(target_arch = "x86", target_feature(enable = "sse"))]
 unsafe extern "C" fn dyn_simd_extract(x: i8x16, idx: u32) -> i8 {
     simd_extract_dyn(x, idx)
 }
@@ -28,6 +31,9 @@ unsafe extern "C" fn dyn_simd_extract(x: i8x16, idx: u32) -> i8 {
 // CHECK-LABEL: literal_dyn_simd_extract
 // CHECK: extractelement <16 x i8> %x, i32 7
 #[no_mangle]
+#[cfg_attr(target_family = "wasm", target_feature(enable = "simd128"))]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "neon"))]
+#[cfg_attr(target_arch = "x86", target_feature(enable = "sse"))]
 unsafe extern "C" fn literal_dyn_simd_extract(x: i8x16) -> i8 {
     simd_extract_dyn(x, 7)
 }
@@ -35,6 +41,9 @@ unsafe extern "C" fn literal_dyn_simd_extract(x: i8x16) -> i8 {
 // CHECK-LABEL: const_dyn_simd_extract
 // CHECK: extractelement <16 x i8> %x, i32 7
 #[no_mangle]
+#[cfg_attr(target_family = "wasm", target_feature(enable = "simd128"))]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "neon"))]
+#[cfg_attr(target_arch = "x86", target_feature(enable = "sse"))]
 unsafe extern "C" fn const_dyn_simd_extract(x: i8x16) -> i8 {
     simd_extract_dyn(x, const { 3 + 4 })
 }
@@ -42,6 +51,9 @@ unsafe extern "C" fn const_dyn_simd_extract(x: i8x16) -> i8 {
 // CHECK-LABEL: const_simd_extract
 // CHECK: extractelement <16 x i8> %x, i32 7
 #[no_mangle]
+#[cfg_attr(target_family = "wasm", target_feature(enable = "simd128"))]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "neon"))]
+#[cfg_attr(target_arch = "x86", target_feature(enable = "sse"))]
 unsafe extern "C" fn const_simd_extract(x: i8x16) -> i8 {
     simd_extract(x, const { 3 + 4 })
 }
@@ -49,6 +61,9 @@ unsafe extern "C" fn const_simd_extract(x: i8x16) -> i8 {
 // CHECK-LABEL: dyn_simd_insert
 // CHECK: insertelement <16 x i8> %x, i8 %e, i32 %idx
 #[no_mangle]
+#[cfg_attr(target_family = "wasm", target_feature(enable = "simd128"))]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "neon"))]
+#[cfg_attr(target_arch = "x86", target_feature(enable = "sse"))]
 unsafe extern "C" fn dyn_simd_insert(x: i8x16, e: i8, idx: u32) -> i8x16 {
     simd_insert_dyn(x, idx, e)
 }
@@ -56,6 +71,9 @@ unsafe extern "C" fn dyn_simd_insert(x: i8x16, e: i8, idx: u32) -> i8x16 {
 // CHECK-LABEL: literal_dyn_simd_insert
 // CHECK: insertelement <16 x i8> %x, i8 %e, i32 7
 #[no_mangle]
+#[cfg_attr(target_family = "wasm", target_feature(enable = "simd128"))]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "neon"))]
+#[cfg_attr(target_arch = "x86", target_feature(enable = "sse"))]
 unsafe extern "C" fn literal_dyn_simd_insert(x: i8x16, e: i8) -> i8x16 {
     simd_insert_dyn(x, 7, e)
 }
@@ -63,6 +81,9 @@ unsafe extern "C" fn literal_dyn_simd_insert(x: i8x16, e: i8) -> i8x16 {
 // CHECK-LABEL: const_dyn_simd_insert
 // CHECK: insertelement <16 x i8> %x, i8 %e, i32 7
 #[no_mangle]
+#[cfg_attr(target_family = "wasm", target_feature(enable = "simd128"))]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "neon"))]
+#[cfg_attr(target_arch = "x86", target_feature(enable = "sse"))]
 unsafe extern "C" fn const_dyn_simd_insert(x: i8x16, e: i8) -> i8x16 {
     simd_insert_dyn(x, const { 3 + 4 }, e)
 }
@@ -70,6 +91,9 @@ unsafe extern "C" fn const_dyn_simd_insert(x: i8x16, e: i8) -> i8x16 {
 // CHECK-LABEL: const_simd_insert
 // CHECK: insertelement <16 x i8> %x, i8 %e, i32 7
 #[no_mangle]
+#[cfg_attr(target_family = "wasm", target_feature(enable = "simd128"))]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "neon"))]
+#[cfg_attr(target_arch = "x86", target_feature(enable = "sse"))]
 unsafe extern "C" fn const_simd_insert(x: i8x16, e: i8) -> i8x16 {
     simd_insert(x, const { 3 + 4 }, e)
 }