10 files changed, 138 insertions, 28 deletions
diff --git a/tests/codegen/align-struct.rs b/tests/codegen/align-struct.rs
index cc65b08a922..402a184d4c0 100644
--- a/tests/codegen/align-struct.rs
+++ b/tests/codegen/align-struct.rs
@@ -1,5 +1,7 @@
 //@ compile-flags: -C no-prepopulate-passes -Z mir-opt-level=0
-//
+// 32bit MSVC does not align things properly so we suppress high alignment annotations (#112480)
+//@ ignore-i686-pc-windows-msvc
+//@ ignore-i686-pc-windows-gnu
 
 #![crate_type = "lib"]
 
diff --git a/tests/codegen/autodiff/identical_fnc.rs b/tests/codegen/autodiff/identical_fnc.rs
new file mode 100644
index 00000000000..1c3277f52b4
--- /dev/null
+++ b/tests/codegen/autodiff/identical_fnc.rs
@@ -0,0 +1,45 @@
+//@ compile-flags: -Zautodiff=Enable -C opt-level=3  -Clto=fat
+//@ no-prefer-dynamic
+//@ needs-enzyme
+//
+// Each autodiff invocation creates a new placeholder function, which we will replace on llvm-ir
+// level. If a user tries to differentiate two identical functions within the same compilation unit,
+// then LLVM might merge them in release mode before AD. In that case we can't rewrite one of the
+// merged placeholder function anymore, and compilation would fail. We prevent this by disabling
+// LLVM's merge_function pass before AD. Here we implicetely test that our solution keeps working.
+// We also explicetly test that we keep running merge_function after AD, by checking for two
+// identical function calls in the LLVM-IR, while having two different calls in the Rust code.
+#![feature(autodiff)]
+
+use std::autodiff::autodiff;
+
+#[autodiff(d_square, Reverse, Duplicated, Active)]
+fn square(x: &f64) -> f64 {
+    x * x
+}
+
+#[autodiff(d_square2, Reverse, Duplicated, Active)]
+fn square2(x: &f64) -> f64 {
+    x * x
+}
+
+// CHECK:; identical_fnc::main
+// CHECK-NEXT:; Function Attrs:
+// CHECK-NEXT:define internal void @_ZN13identical_fnc4main17hf4dbc69c8d2f9130E()
+// CHECK-NEXT:start:
+// CHECK-NOT:br
+// CHECK-NOT:ret
+// CHECK:; call identical_fnc::d_square
+// CHECK-NEXT:  call fastcc void @_ZN13identical_fnc8d_square17h4c364207a2f8e06dE(double %x.val, ptr noalias noundef nonnull align 8 dereferenceable(8) %dx1)
+// CHECK-NEXT:; call identical_fnc::d_square
+// CHECK-NEXT:  call fastcc void @_ZN13identical_fnc8d_square17h4c364207a2f8e06dE(double %x.val, ptr noalias noundef nonnull align 8 dereferenceable(8) %dx2)
+
+fn main() {
+    let x = std::hint::black_box(3.0);
+    let mut dx1 = std::hint::black_box(1.0);
+    let mut dx2 = std::hint::black_box(1.0);
+    let _ = d_square(&x, &mut dx1, 1.0);
+    let _ = d_square2(&x, &mut dx2, 1.0);
+    assert_eq!(dx1, 6.0);
+    assert_eq!(dx2, 6.0);
+}
diff --git a/tests/codegen/autodiff/inline.rs b/tests/codegen/autodiff/inline.rs
new file mode 100644
index 00000000000..e90faa4aa38
--- /dev/null
+++ b/tests/codegen/autodiff/inline.rs
@@ -0,0 +1,23 @@
+//@ compile-flags: -Zautodiff=Enable -C opt-level=3  -Clto=fat -Zautodiff=NoPostopt
+//@ no-prefer-dynamic
+//@ needs-enzyme
+
+#![feature(autodiff)]
+
+use std::autodiff::autodiff;
+
+#[autodiff(d_square, Reverse, Duplicated, Active)]
+fn square(x: &f64) -> f64 {
+    x * x
+}
+
+// CHECK: ; inline::d_square
+// CHECK-NEXT: ; Function Attrs: alwaysinline
+// CHECK-NOT: noinline
+// CHECK-NEXT: define internal fastcc void @_ZN6inline8d_square17h021c74e92c259cdeE
+fn main() {
+    let x = std::hint::black_box(3.0);
+    let mut dx1 = std::hint::black_box(1.0);
+    let _ = d_square(&x, &mut dx1, 1.0);
+    assert_eq!(dx1, 6.0);
+}
diff --git a/tests/codegen/const-vector.rs b/tests/codegen/const-vector.rs
index 8343594e5d2..42921442e03 100644
--- a/tests/codegen/const-vector.rs
+++ b/tests/codegen/const-vector.rs
@@ -1,4 +1,8 @@
-//@ compile-flags: -C no-prepopulate-passes -Copt-level=0
+//@ revisions: OPT0 OPT0_S390X
+//@ [OPT0] ignore-s390x
+//@ [OPT0_S390X] only-s390x
+//@ [OPT0] compile-flags: -C no-prepopulate-passes -Copt-level=0
+//@ [OPT0_S390X] compile-flags: -C no-prepopulate-passes -Copt-level=0 -C target-cpu=z13
 
 // This test checks that constants of SIMD type are passed as immediate vectors.
 // We ensure that both vector representations (struct with fields and struct wrapping array) work.
@@ -8,6 +12,8 @@
 #![feature(repr_simd)]
 #![feature(rustc_attrs)]
 #![feature(simd_ffi)]
+#![feature(arm_target_feature)]
+#![feature(mips_target_feature)]
 #![allow(non_camel_case_types)]
 
 // Setting up structs that can be used as const vectors
@@ -28,33 +34,12 @@ pub struct Simd<T, const N: usize>([T; N]);
 
 extern "unadjusted" {
     fn test_i8x2(a: i8x2);
-}
-
-extern "unadjusted" {
     fn test_i8x2_two_args(a: i8x2, b: i8x2);
-}
-
-extern "unadjusted" {
     fn test_i8x2_mixed_args(a: i8x2, c: i32, b: i8x2);
-}
-
-extern "unadjusted" {
     fn test_i8x2_arr(a: i8x2);
-}
-
-extern "unadjusted" {
     fn test_f32x2(a: f32x2);
-}
-
-extern "unadjusted" {
     fn test_f32x2_arr(a: f32x2);
-}
-
-extern "unadjusted" {
     fn test_simd(a: Simd<i32, 4>);
-}
-
-extern "unadjusted" {
     fn test_simd_unaligned(a: Simd<i32, 3>);
 }
 
@@ -62,6 +47,10 @@ extern "unadjusted" {
 // if the size is not a power of 2
 // CHECK: %"Simd<i32, 3>" = type { [3 x i32] }
 
+#[cfg_attr(target_family = "wasm", target_feature(enable = "simd128"))]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "neon"))]
+#[cfg_attr(target_arch = "x86", target_feature(enable = "sse"))]
+#[cfg_attr(target_arch = "mips", target_feature(enable = "msa"))]
 pub fn do_call() {
     unsafe {
         // CHECK: call void @test_i8x2(<2 x i8> <i8 32, i8 64>
diff --git a/tests/codegen/intrinsics/select_unpredictable.rs b/tests/codegen/intrinsics/select_unpredictable.rs
index 2db4ae174b3..ad7120c6fb8 100644
--- a/tests/codegen/intrinsics/select_unpredictable.rs
+++ b/tests/codegen/intrinsics/select_unpredictable.rs
@@ -1,7 +1,6 @@
 //@ compile-flags: -Copt-level=3 -Zmerge-functions=disabled
 
 #![feature(core_intrinsics)]
-#![feature(select_unpredictable)]
 #![crate_type = "lib"]
 
 /* Test the intrinsic */
diff --git a/tests/codegen/issues/issue-56927.rs b/tests/codegen/issues/issue-56927.rs
index a40718689b3..415ef073e03 100644
--- a/tests/codegen/issues/issue-56927.rs
+++ b/tests/codegen/issues/issue-56927.rs
@@ -1,4 +1,7 @@
 //@ compile-flags: -C no-prepopulate-passes
+// 32bit MSVC does not align things properly so we suppress high alignment annotations (#112480)
+//@ ignore-i686-pc-windows-msvc
+//@ ignore-i686-pc-windows-gnu
 
 #![crate_type = "rlib"]
 
diff --git a/tests/codegen/regparm-inreg.rs b/tests/codegen/regparm-inreg.rs
index 8dae3a83e4e..15702804dfd 100644
--- a/tests/codegen/regparm-inreg.rs
+++ b/tests/codegen/regparm-inreg.rs
@@ -3,7 +3,7 @@
 // x86 only.
 
 //@ add-core-stubs
-//@ compile-flags: --target i686-unknown-linux-gnu -Cno-prepopulate-passes -Copt-level=3
+//@ compile-flags: --target i686-unknown-linux-gnu -Cno-prepopulate-passes -Copt-level=3 -Ctarget-feature=+avx
 //@ needs-llvm-components: x86
 
 //@ revisions:regparm0 regparm1 regparm2 regparm3
diff --git a/tests/codegen/repr/transparent.rs b/tests/codegen/repr/transparent.rs
index e7e4c40a099..29b627462a4 100644
--- a/tests/codegen/repr/transparent.rs
+++ b/tests/codegen/repr/transparent.rs
@@ -9,7 +9,7 @@
 // For LoongArch: see codegen/loongarch-abi
 
 #![crate_type = "lib"]
-#![feature(repr_simd, transparent_unions)]
+#![feature(repr_simd, transparent_unions, arm_target_feature, mips_target_feature)]
 
 use std::marker::PhantomData;
 
@@ -139,6 +139,10 @@ pub struct Vector(f32x4);
 
 // CHECK: define{{.*}}<4 x float> @test_Vector(<4 x float> %_1)
 #[no_mangle]
+#[cfg_attr(target_family = "wasm", target_feature(enable = "simd128"))]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "neon"))]
+#[cfg_attr(target_arch = "x86", target_feature(enable = "sse"))]
+#[cfg_attr(target_arch = "mips", target_feature(enable = "msa"))]
 pub extern "C" fn test_Vector(_: Vector) -> Vector {
     loop {}
 }
diff --git a/tests/codegen/simd/extract-insert-dyn.rs b/tests/codegen/simd/extract-insert-dyn.rs
index 584e2c7887a..729f0145314 100644
--- a/tests/codegen/simd/extract-insert-dyn.rs
+++ b/tests/codegen/simd/extract-insert-dyn.rs
@@ -1,6 +1,12 @@
 //@compile-flags: -C opt-level=3 -C no-prepopulate-passes
 
-#![feature(core_intrinsics, repr_simd)]
+#![feature(
+    core_intrinsics,
+    repr_simd,
+    arm_target_feature,
+    mips_target_feature,
+    s390x_target_feature
+)]
 #![no_std]
 #![crate_type = "lib"]
 #![allow(non_camel_case_types)]
@@ -21,6 +27,11 @@ pub struct i8x16([i8; 16]);
 // CHECK-LABEL: dyn_simd_extract
 // CHECK: extractelement <16 x i8> %x, i32 %idx
 #[no_mangle]
+#[cfg_attr(target_family = "wasm", target_feature(enable = "simd128"))]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "neon"))]
+#[cfg_attr(target_arch = "x86", target_feature(enable = "sse"))]
+#[cfg_attr(target_arch = "mips", target_feature(enable = "msa"))]
+#[cfg_attr(target_arch = "s390x", target_feature(enable = "vector"))]
 unsafe extern "C" fn dyn_simd_extract(x: i8x16, idx: u32) -> i8 {
     simd_extract_dyn(x, idx)
 }
@@ -28,6 +39,11 @@ unsafe extern "C" fn dyn_simd_extract(x: i8x16, idx: u32) -> i8 {
 // CHECK-LABEL: literal_dyn_simd_extract
 // CHECK: extractelement <16 x i8> %x, i32 7
 #[no_mangle]
+#[cfg_attr(target_family = "wasm", target_feature(enable = "simd128"))]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "neon"))]
+#[cfg_attr(target_arch = "x86", target_feature(enable = "sse"))]
+#[cfg_attr(target_arch = "mips", target_feature(enable = "msa"))]
+#[cfg_attr(target_arch = "s390x", target_feature(enable = "vector"))]
 unsafe extern "C" fn literal_dyn_simd_extract(x: i8x16) -> i8 {
     simd_extract_dyn(x, 7)
 }
@@ -35,6 +51,11 @@ unsafe extern "C" fn literal_dyn_simd_extract(x: i8x16) -> i8 {
 // CHECK-LABEL: const_dyn_simd_extract
 // CHECK: extractelement <16 x i8> %x, i32 7
 #[no_mangle]
+#[cfg_attr(target_family = "wasm", target_feature(enable = "simd128"))]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "neon"))]
+#[cfg_attr(target_arch = "x86", target_feature(enable = "sse"))]
+#[cfg_attr(target_arch = "mips", target_feature(enable = "msa"))]
+#[cfg_attr(target_arch = "s390x", target_feature(enable = "vector"))]
 unsafe extern "C" fn const_dyn_simd_extract(x: i8x16) -> i8 {
     simd_extract_dyn(x, const { 3 + 4 })
 }
@@ -42,6 +63,11 @@ unsafe extern "C" fn const_dyn_simd_extract(x: i8x16) -> i8 {
 // CHECK-LABEL: const_simd_extract
 // CHECK: extractelement <16 x i8> %x, i32 7
 #[no_mangle]
+#[cfg_attr(target_family = "wasm", target_feature(enable = "simd128"))]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "neon"))]
+#[cfg_attr(target_arch = "x86", target_feature(enable = "sse"))]
+#[cfg_attr(target_arch = "mips", target_feature(enable = "msa"))]
+#[cfg_attr(target_arch = "s390x", target_feature(enable = "vector"))]
 unsafe extern "C" fn const_simd_extract(x: i8x16) -> i8 {
     simd_extract(x, const { 3 + 4 })
 }
@@ -49,6 +75,11 @@ unsafe extern "C" fn const_simd_extract(x: i8x16) -> i8 {
 // CHECK-LABEL: dyn_simd_insert
 // CHECK: insertelement <16 x i8> %x, i8 %e, i32 %idx
 #[no_mangle]
+#[cfg_attr(target_family = "wasm", target_feature(enable = "simd128"))]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "neon"))]
+#[cfg_attr(target_arch = "x86", target_feature(enable = "sse"))]
+#[cfg_attr(target_arch = "mips", target_feature(enable = "msa"))]
+#[cfg_attr(target_arch = "s390x", target_feature(enable = "vector"))]
 unsafe extern "C" fn dyn_simd_insert(x: i8x16, e: i8, idx: u32) -> i8x16 {
     simd_insert_dyn(x, idx, e)
 }
@@ -56,6 +87,11 @@ unsafe extern "C" fn dyn_simd_insert(x: i8x16, e: i8, idx: u32) -> i8x16 {
 // CHECK-LABEL: literal_dyn_simd_insert
 // CHECK: insertelement <16 x i8> %x, i8 %e, i32 7
 #[no_mangle]
+#[cfg_attr(target_family = "wasm", target_feature(enable = "simd128"))]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "neon"))]
+#[cfg_attr(target_arch = "x86", target_feature(enable = "sse"))]
+#[cfg_attr(target_arch = "mips", target_feature(enable = "msa"))]
+#[cfg_attr(target_arch = "s390x", target_feature(enable = "vector"))]
 unsafe extern "C" fn literal_dyn_simd_insert(x: i8x16, e: i8) -> i8x16 {
     simd_insert_dyn(x, 7, e)
 }
@@ -63,6 +99,11 @@ unsafe extern "C" fn literal_dyn_simd_insert(x: i8x16, e: i8) -> i8x16 {
 // CHECK-LABEL: const_dyn_simd_insert
 // CHECK: insertelement <16 x i8> %x, i8 %e, i32 7
 #[no_mangle]
+#[cfg_attr(target_family = "wasm", target_feature(enable = "simd128"))]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "neon"))]
+#[cfg_attr(target_arch = "x86", target_feature(enable = "sse"))]
+#[cfg_attr(target_arch = "mips", target_feature(enable = "msa"))]
+#[cfg_attr(target_arch = "s390x", target_feature(enable = "vector"))]
 unsafe extern "C" fn const_dyn_simd_insert(x: i8x16, e: i8) -> i8x16 {
     simd_insert_dyn(x, const { 3 + 4 }, e)
 }
@@ -70,6 +111,11 @@ unsafe extern "C" fn const_dyn_simd_insert(x: i8x16, e: i8) -> i8x16 {
 // CHECK-LABEL: const_simd_insert
 // CHECK: insertelement <16 x i8> %x, i8 %e, i32 7
 #[no_mangle]
+#[cfg_attr(target_family = "wasm", target_feature(enable = "simd128"))]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "neon"))]
+#[cfg_attr(target_arch = "x86", target_feature(enable = "sse"))]
+#[cfg_attr(target_arch = "mips", target_feature(enable = "msa"))]
+#[cfg_attr(target_arch = "s390x", target_feature(enable = "vector"))]
 unsafe extern "C" fn const_simd_insert(x: i8x16, e: i8) -> i8x16 {
     simd_insert(x, const { 3 + 4 }, e)
 }
diff --git a/tests/codegen/slice-as_chunks.rs b/tests/codegen/slice-as_chunks.rs
index a90ee7c628e..337eb8981f6 100644
--- a/tests/codegen/slice-as_chunks.rs
+++ b/tests/codegen/slice-as_chunks.rs
@@ -2,7 +2,6 @@
 //@ only-64bit (because the LLVM type of i64 for usize shows up)
 
 #![crate_type = "lib"]
-#![feature(slice_as_chunks)]
 
 // CHECK-LABEL: @chunks4
 #[no_mangle]