x86-sse2 ABI: use SSE registers for floats and SIMD

author: Ralf Jung <post@ralfj.de> 2025-01-29 12:23:15 +0100
committer: Ralf Jung <post@ralfj.de> 2025-02-18 16:11:41 +0100
commit: 803feb5dc679556ba2d93fe616d1883d7ebc7775 (patch)
tree: 92c7d2c3167497ef56c18b26bd94c0afc6ec61a7 /tests/codegen
parent: de91711756133f11f7861dd099c001eaff0aafaa (diff)
download: rust-803feb5dc679556ba2d93fe616d1883d7ebc7775.tar.gz
rust-803feb5dc679556ba2d93fe616d1883d7ebc7775.zip
8 files changed, 161 insertions, 73 deletions
diff --git a/tests/codegen/abi-x86-sse.rs b/tests/codegen/abi-x86-sse.rs
new file mode 100644
index 00000000000..837bf6134b0
--- /dev/null
+++ b/tests/codegen/abi-x86-sse.rs
@@ -0,0 +1,36 @@
+//@ compile-flags: -Z merge-functions=disabled
+
+//@ revisions: x86-64
+//@[x86-64] compile-flags: --target x86_64-unknown-linux-gnu
+//@[x86-64] needs-llvm-components: x86
+
+//@ revisions: x86-32
+//@[x86-32] compile-flags: --target i686-unknown-linux-gnu
+//@[x86-32] needs-llvm-components: x86
+
+//@ revisions: x86-32-nosse
+//@[x86-32-nosse] compile-flags: --target i586-unknown-linux-gnu
+//@[x86-32-nosse] needs-llvm-components: x86
+
+#![feature(no_core, lang_items, rustc_attrs, repr_simd)]
+#![no_core]
+#![crate_type = "lib"]
+
+#[lang = "sized"]
+trait Sized {}
+
+#[lang = "copy"]
+trait Copy {}
+
+// Ensure this type is passed without ptr indirection on targets that
+// require SSE2.
+#[repr(simd)]
+pub struct Sse([f32; 4]);
+
+// x86-64: <4 x float> @sse_id(<4 x float> {{[^,]*}})
+// x86-32: <4 x float> @sse_id(<4 x float> {{[^,]*}})
+// x86-32-nosse: void @sse_id(ptr{{( [^,]*)?}} sret([16 x i8]){{( .*)?}}, ptr{{( [^,]*)?}})
+#[no_mangle]
+pub fn sse_id(x: Sse) -> Sse {
+    x
+}
diff --git a/tests/codegen/float/f128.rs b/tests/codegen/float/f128.rs
index 562a8e6c9e9..d87bab1172a 100644
--- a/tests/codegen/float/f128.rs
+++ b/tests/codegen/float/f128.rs
@@ -1,8 +1,11 @@
 // 32-bit x86 returns float types differently to avoid the x87 stack.
 // 32-bit systems will return 128bit values using a return area pointer.
 // Emscripten aligns f128 to 8 bytes, not 16.
-//@ revisions: x86 bit32 bit64 emscripten
-//@[x86] only-x86
+//@ revisions: x86-sse x86-nosse bit32 bit64 emscripten
+//@[x86-sse] only-x86
+//@[x86-sse] only-rustc_abi-x86-sse2
+//@[x86-nosse] only-x86
+//@[x86-nosse] ignore-rustc_abi-x86-sse2
 //@[bit32] ignore-x86
 //@[bit32] ignore-emscripten
 //@[bit32] only-32bit
@@ -60,7 +63,8 @@ pub fn f128_le(a: f128, b: f128) -> bool {
     a <= b
 }
 
-// x86-LABEL: void @f128_neg({{.*}}sret([16 x i8])
+// x86-nosse-LABEL: void @f128_neg({{.*}}sret([16 x i8])
+// x86-sse-LABEL: <16 x i8> @f128_neg(fp128
 // bit32-LABEL: void @f128_neg({{.*}}sret([16 x i8])
 // bit64-LABEL: fp128 @f128_neg(
 // emscripten-LABEL: void @f128_neg({{.*}}sret([16 x i8])
@@ -70,7 +74,8 @@ pub fn f128_neg(a: f128) -> f128 {
     -a
 }
 
-// x86-LABEL: void @f128_add({{.*}}sret([16 x i8])
+// x86-nosse-LABEL: void @f128_add({{.*}}sret([16 x i8])
+// x86-sse-LABEL: <16 x i8> @f128_add(fp128
 // bit32-LABEL: void @f128_add({{.*}}sret([16 x i8])
 // bit64-LABEL: fp128 @f128_add(
 // emscripten-LABEL: void @f128_add({{.*}}sret([16 x i8])
@@ -80,7 +85,8 @@ pub fn f128_add(a: f128, b: f128) -> f128 {
     a + b
 }
 
-// x86-LABEL: void @f128_sub({{.*}}sret([16 x i8])
+// x86-nosse-LABEL: void @f128_sub({{.*}}sret([16 x i8])
+// x86-sse-LABEL: <16 x i8> @f128_sub(fp128
 // bit32-LABEL: void @f128_sub({{.*}}sret([16 x i8])
 // bit64-LABEL: fp128 @f128_sub(
 // emscripten-LABEL: void @f128_sub({{.*}}sret([16 x i8])
@@ -90,7 +96,8 @@ pub fn f128_sub(a: f128, b: f128) -> f128 {
     a - b
 }
 
-// x86-LABEL: void @f128_mul({{.*}}sret([16 x i8])
+// x86-nosse-LABEL: void @f128_mul({{.*}}sret([16 x i8])
+// x86-sse-LABEL: <16 x i8> @f128_mul(fp128
 // bit32-LABEL: void @f128_mul({{.*}}sret([16 x i8])
 // bit64-LABEL: fp128 @f128_mul(
 // emscripten-LABEL: void @f128_mul({{.*}}sret([16 x i8])
@@ -100,7 +107,8 @@ pub fn f128_mul(a: f128, b: f128) -> f128 {
     a * b
 }
 
-// x86-LABEL: void @f128_div({{.*}}sret([16 x i8])
+// x86-nosse-LABEL: void @f128_div({{.*}}sret([16 x i8])
+// x86-sse-LABEL: <16 x i8> @f128_div(fp128
 // bit32-LABEL: void @f128_div({{.*}}sret([16 x i8])
 // bit64-LABEL: fp128 @f128_div(
 // emscripten-LABEL: void @f128_div({{.*}}sret([16 x i8])
@@ -110,7 +118,8 @@ pub fn f128_div(a: f128, b: f128) -> f128 {
     a / b
 }
 
-// x86-LABEL: void @f128_rem({{.*}}sret([16 x i8])
+// x86-nosse-LABEL: void @f128_rem({{.*}}sret([16 x i8])
+// x86-sse-LABEL: <16 x i8> @f128_rem(fp128
 // bit32-LABEL: void @f128_rem({{.*}}sret([16 x i8])
 // bit64-LABEL: fp128 @f128_rem(
 // emscripten-LABEL: void @f128_rem({{.*}}sret([16 x i8])
@@ -162,7 +171,8 @@ pub fn f128_rem_assign(a: &mut f128, b: f128) {
 
 /* float to float conversions */
 
-// x86-LABEL: i16 @f128_as_f16(
+// x86-sse-LABEL: <2 x i8> @f128_as_f16(
+// x86-nosse-LABEL: i16 @f128_as_f16(
 // bits32-LABEL: half @f128_as_f16(
 // bits64-LABEL: half @f128_as_f16(
 #[no_mangle]
@@ -171,7 +181,8 @@ pub fn f128_as_f16(a: f128) -> f16 {
     a as f16
 }
 
-// x86-LABEL: i32 @f128_as_f32(
+// x86-sse-LABEL: <4 x i8> @f128_as_f32(
+// x86-nosse-LABEL: i32 @f128_as_f32(
 // bit32-LABEL: float @f128_as_f32(
 // bit64-LABEL: float @f128_as_f32(
 // emscripten-LABEL: float @f128_as_f32(
@@ -181,7 +192,8 @@ pub fn f128_as_f32(a: f128) -> f32 {
     a as f32
 }
 
-// x86-LABEL: void @f128_as_f64(
+// x86-sse-LABEL: <8 x i8> @f128_as_f64(
+// x86-nosse-LABEL: void @f128_as_f64({{.*}}sret([8 x i8])
 // bit32-LABEL: double @f128_as_f64(
 // bit64-LABEL: double @f128_as_f64(
 // emscripten-LABEL: double @f128_as_f64(
@@ -191,7 +203,8 @@ pub fn f128_as_f64(a: f128) -> f64 {
     a as f64
 }
 
-// x86-LABEL: void @f128_as_self({{.*}}sret([16 x i8])
+// x86-sse-LABEL: <16 x i8> @f128_as_self(
+// x86-nosse-LABEL: void @f128_as_self({{.*}}sret([16 x i8])
 // bit32-LABEL: void @f128_as_self({{.*}}sret([16 x i8])
 // bit64-LABEL: fp128 @f128_as_self(
 // emscripten-LABEL: void @f128_as_self({{.*}}sret([16 x i8])
@@ -204,7 +217,8 @@ pub fn f128_as_self(a: f128) -> f128 {
     a as f128
 }
 
-// x86-LABEL: void @f16_as_f128({{.*}}sret([16 x i8])
+// x86-sse-LABEL: <16 x i8> @f16_as_f128(
+// x86-nosse-LABEL: void @f16_as_f128({{.*}}sret([16 x i8])
 // bit32-LABEL: void @f16_as_f128({{.*}}sret([16 x i8])
 // bit64-LABEL: fp128 @f16_as_f128(
 // emscripten-LABEL: void @f16_as_f128({{.*}}sret([16 x i8])
@@ -214,7 +228,8 @@ pub fn f16_as_f128(a: f16) -> f128 {
     a as f128
 }
 
-// x86-LABEL: void @f32_as_f128({{.*}}sret([16 x i8])
+// x86-sse-LABEL: <16 x i8> @f32_as_f128(
+// x86-nosse-LABEL: void @f32_as_f128({{.*}}sret([16 x i8])
 // bit32-LABEL: void @f32_as_f128({{.*}}sret([16 x i8])
 // bit64-LABEL: fp128 @f32_as_f128(
 // emscripten-LABEL: void @f32_as_f128({{.*}}sret([16 x i8])
@@ -224,7 +239,8 @@ pub fn f32_as_f128(a: f32) -> f128 {
     a as f128
 }
 
-// x86-LABEL: void @f64_as_f128({{.*}}sret([16 x i8])
+// x86-sse-LABEL: <16 x i8> @f64_as_f128(
+// x86-nosse-LABEL: void @f64_as_f128({{.*}}sret([16 x i8])
 // bit32-LABEL: void @f64_as_f128({{.*}}sret([16 x i8])
 // bit64-LABEL: fp128 @f64_as_f128(
 // emscripten-LABEL: void @f64_as_f128({{.*}}sret([16 x i8])
@@ -263,7 +279,8 @@ pub fn f128_as_u64(a: f128) -> u64 {
     a as u64
 }
 
-// x86-LABEL: void @f128_as_u128({{.*}}sret([16 x i8])
+// x86-sse-LABEL: void @f128_as_u128({{.*}}sret([16 x i8])
+// x86-nosse-LABEL: void @f128_as_u128({{.*}}sret([16 x i8])
 // bit32-LABEL: void @f128_as_u128({{.*}}sret([16 x i8])
 // bit64-LABEL: i128 @f128_as_u128(
 // emscripten-LABEL: void @f128_as_u128({{.*}}sret([16 x i8])
@@ -300,7 +317,8 @@ pub fn f128_as_i64(a: f128) -> i64 {
     a as i64
 }
 
-// x86-LABEL: void @f128_as_i128({{.*}}sret([16 x i8])
+// x86-sse-LABEL: void @f128_as_i128({{.*}}sret([16 x i8])
+// x86-nosse-LABEL: void @f128_as_i128({{.*}}sret([16 x i8])
 // bit32-LABEL: void @f128_as_i128({{.*}}sret([16 x i8])
 // bit64-LABEL: i128 @f128_as_i128(
 // emscripten-LABEL: void @f128_as_i128({{.*}}sret([16 x i8])
@@ -312,7 +330,8 @@ pub fn f128_as_i128(a: f128) -> i128 {
 
 /* int to float conversions */
 
-// x86-LABEL: void @u8_as_f128({{.*}}sret([16 x i8])
+// x86-sse-LABEL: <16 x i8> @u8_as_f128(
+// x86-nosse-LABEL: void @u8_as_f128({{.*}}sret([16 x i8])
 // bit32-LABEL: void @u8_as_f128({{.*}}sret([16 x i8])
 // bit64-LABEL: fp128 @u8_as_f128(
 // emscripten-LABEL: void @u8_as_f128({{.*}}sret([16 x i8])
@@ -322,7 +341,8 @@ pub fn u8_as_f128(a: u8) -> f128 {
     a as f128
 }
 
-// x86-LABEL: void @u16_as_f128({{.*}}sret([16 x i8])
+// x86-sse-LABEL: <16 x i8> @u16_as_f128(
+// x86-nosse-LABEL: void @u16_as_f128({{.*}}sret([16 x i8])
 // bit32-LABEL: void @u16_as_f128({{.*}}sret([16 x i8])
 // bit64-LABEL: fp128 @u16_as_f128(
 // emscripten-LABEL: void @u16_as_f128({{.*}}sret([16 x i8])
@@ -332,7 +352,8 @@ pub fn u16_as_f128(a: u16) -> f128 {
     a as f128
 }
 
-// x86-LABEL: void @u32_as_f128({{.*}}sret([16 x i8])
+// x86-sse-LABEL: <16 x i8> @u32_as_f128(
+// x86-nosse-LABEL: void @u32_as_f128({{.*}}sret([16 x i8])
 // bit32-LABEL: void @u32_as_f128({{.*}}sret([16 x i8])
 // bit64-LABEL: fp128 @u32_as_f128(
 // emscripten-LABEL: void @u32_as_f128({{.*}}sret([16 x i8])
@@ -342,7 +363,8 @@ pub fn u32_as_f128(a: u32) -> f128 {
     a as f128
 }
 
-// x86-LABEL: void @u64_as_f128({{.*}}sret([16 x i8])
+// x86-sse-LABEL: <16 x i8> @u64_as_f128(
+// x86-nosse-LABEL: void @u64_as_f128({{.*}}sret([16 x i8])
 // bit32-LABEL: void @u64_as_f128({{.*}}sret([16 x i8])
 // bit64-LABEL: fp128 @u64_as_f128(
 // emscripten-LABEL: void @u64_as_f128({{.*}}sret([16 x i8])
@@ -352,7 +374,8 @@ pub fn u64_as_f128(a: u64) -> f128 {
     a as f128
 }
 
-// x86-LABEL: void @u128_as_f128({{.*}}sret([16 x i8])
+// x86-sse-LABEL: <16 x i8> @u128_as_f128(
+// x86-nosse-LABEL: void @u128_as_f128({{.*}}sret([16 x i8])
 // bit32-LABEL: void @u128_as_f128({{.*}}sret([16 x i8])
 // bit64-LABEL: fp128 @u128_as_f128(
 // emscripten-LABEL: void @u128_as_f128({{.*}}sret([16 x i8])
@@ -362,7 +385,8 @@ pub fn u128_as_f128(a: u128) -> f128 {
     a as f128
 }
 
-// x86-LABEL: void @i8_as_f128({{.*}}sret([16 x i8])
+// x86-sse-LABEL: <16 x i8> @i8_as_f128(
+// x86-nosse-LABEL: void @i8_as_f128({{.*}}sret([16 x i8])
 // bit32-LABEL: void @i8_as_f128({{.*}}sret([16 x i8])
 // bit64-LABEL: fp128 @i8_as_f128(
 // emscripten-LABEL: void @i8_as_f128({{.*}}sret([16 x i8])
@@ -372,7 +396,8 @@ pub fn i8_as_f128(a: i8) -> f128 {
     a as f128
 }
 
-// x86-LABEL: void @i16_as_f128({{.*}}sret([16 x i8])
+// x86-sse-LABEL: <16 x i8> @i16_as_f128(
+// x86-nosse-LABEL: void @i16_as_f128({{.*}}sret([16 x i8])
 // bit32-LABEL: void @i16_as_f128({{.*}}sret([16 x i8])
 // bit64-LABEL: fp128 @i16_as_f128(
 // emscripten-LABEL: void @i16_as_f128({{.*}}sret([16 x i8])
@@ -382,7 +407,8 @@ pub fn i16_as_f128(a: i16) -> f128 {
     a as f128
 }
 
-// x86-LABEL: void @i32_as_f128({{.*}}sret([16 x i8])
+// x86-sse-LABEL: <16 x i8> @i32_as_f128(
+// x86-nosse-LABEL: void @i32_as_f128({{.*}}sret([16 x i8])
 // bit32-LABEL: void @i32_as_f128({{.*}}sret([16 x i8])
 // bit64-LABEL: fp128 @i32_as_f128(
 // emscripten-LABEL: void @i32_as_f128({{.*}}sret([16 x i8])
@@ -392,7 +418,8 @@ pub fn i32_as_f128(a: i32) -> f128 {
     a as f128
 }
 
-// x86-LABEL: void @i64_as_f128({{.*}}sret([16 x i8])
+// x86-sse-LABEL: <16 x i8> @i64_as_f128(
+// x86-nosse-LABEL: void @i64_as_f128({{.*}}sret([16 x i8])
 // bit32-LABEL: void @i64_as_f128({{.*}}sret([16 x i8])
 // bit64-LABEL: fp128 @i64_as_f128(
 // emscripten-LABEL: void @i64_as_f128({{.*}}sret([16 x i8])
@@ -402,7 +429,8 @@ pub fn i64_as_f128(a: i64) -> f128 {
     a as f128
 }
 
-// x86-LABEL: void @i128_as_f128({{.*}}sret([16 x i8])
+// x86-sse-LABEL: <16 x i8> @i128_as_f128(
+// x86-nosse-LABEL: void @i128_as_f128({{.*}}sret([16 x i8])
 // bit32-LABEL: void @i128_as_f128({{.*}}sret([16 x i8])
 // bit64-LABEL: fp128 @i128_as_f128(
 // emscripten-LABEL: void @i128_as_f128({{.*}}sret([16 x i8])
diff --git a/tests/codegen/float/f16.rs b/tests/codegen/float/f16.rs
index 5c3a5893b9d..0c40606ad8a 100644
--- a/tests/codegen/float/f16.rs
+++ b/tests/codegen/float/f16.rs
@@ -1,7 +1,10 @@
 // 32-bit x86 returns float types differently to avoid the x87 stack.
 // 32-bit systems will return 128bit values using a return area pointer.
-//@ revisions: x86 bit32 bit64
-//@[x86] only-x86
+//@ revisions: x86-sse x86-nosse bit32 bit64
+//@[x86-sse] only-x86
+//@[x86-sse] only-rustc_abi-x86-sse2
+//@[x86-nosse] only-x86
+//@[x86-nosse] ignore-rustc_abi-x86-sse2
 //@[bit32] ignore-x86
 //@[bit32] only-32bit
 //@[bit64] ignore-x86
@@ -59,8 +62,10 @@ pub fn f16_le(a: f16, b: f16) -> bool {
 }
 
 // This is where we check the argument and return ABI for f16.
-// other-LABEL: half @f16_neg(half
-// x86-LABEL: i16 @f16_neg(half
+// bit32-LABEL: half @f16_neg(half
+// bit64-LABEL: half @f16_neg(half
+// x86-sse-LABEL: <2 x i8> @f16_neg(half
+// x86-nosse-LABEL: i16 @f16_neg(half
 #[no_mangle]
 pub fn f16_neg(a: f16) -> f16 {
     // CHECK: fneg half %{{.+}}
@@ -144,17 +149,23 @@ pub fn f16_rem_assign(a: &mut f16, b: f16) {
 
 /* float to float conversions */
 
-// other-LABEL: half @f16_as_self(
-// x86-LABEL: i16 @f16_as_self(
+// bit32-LABEL: half @f16_as_self(
+// bit64-LABEL: half @f16_as_self(
+// x86-sse-LABEL: <2 x i8> @f16_as_self(
+// x86-nosse-LABEL: i16 @f16_as_self(
 #[no_mangle]
 pub fn f16_as_self(a: f16) -> f16 {
-    // other-CHECK: ret half %{{.+}}
-    // x86-CHECK: bitcast half
-    // x86-CHECK: ret i16
+    // bit32-CHECK: ret half %{{.+}}
+    // bit64-CHECK: ret half %{{.+}}
+    // x86-sse-CHECK: bitcast half
+    // x86-nosse-CHECK: bitcast half
+    // x86-sse-CHECK: ret i16
+    // x86-nosse-CHECK: ret i16
     a as f16
 }
 
-// x86-LABEL: i32 @f16_as_f32(
+// x86-sse-LABEL: <4 x i8> @f16_as_f32(
+// x86-nosse-LABEL: i32 @f16_as_f32(
 // bit32-LABEL: float @f16_as_f32(
 // bit64-LABEL: float @f16_as_f32(
 #[no_mangle]
@@ -163,7 +174,8 @@ pub fn f16_as_f32(a: f16) -> f32 {
     a as f32
 }
 
-// x86-LABEL: void @f16_as_f64(
+// x86-sse-LABEL: <8 x i8> @f16_as_f64(
+// x86-nosse-LABEL: void @f16_as_f64({{.*}}sret([8 x i8])
 // bit32-LABEL: double @f16_as_f64(
 // bit64-LABEL: double @f16_as_f64(
 #[no_mangle]
@@ -172,7 +184,8 @@ pub fn f16_as_f64(a: f16) -> f64 {
     a as f64
 }
 
-// x86-LABEL: void @f16_as_f128({{.*}}sret([16 x i8])
+// x86-sse-LABEL: <16 x i8> @f16_as_f128(
+// x86-nosse-LABEL: void @f16_as_f128({{.*}}sret([16 x i8])
 // bit32-LABEL: void @f16_as_f128({{.*}}sret([16 x i8])
 // bit64-LABEL: fp128 @f16_as_f128(
 #[no_mangle]
@@ -231,7 +244,8 @@ pub fn f16_as_u64(a: f16) -> u64 {
     a as u64
 }
 
-// x86-LABEL: void @f16_as_u128({{.*}}sret([16 x i8])
+// x86-sse-LABEL: void @f16_as_u128({{.*}}sret([16 x i8])
+// x86-nosse-LABEL: void @f16_as_u128({{.*}}sret([16 x i8])
 // bit32-LABEL: void @f16_as_u128({{.*}}sret([16 x i8])
 // bit64-LABEL: i128 @f16_as_u128(
 #[no_mangle]
@@ -267,7 +281,8 @@ pub fn f16_as_i64(a: f16) -> i64 {
     a as i64
 }
 
-// x86-LABEL: void @f16_as_i128({{.*}}sret([16 x i8])
+// x86-sse-LABEL: void @f16_as_i128({{.*}}sret([16 x i8])
+// x86-nosse-LABEL: void @f16_as_i128({{.*}}sret([16 x i8])
 // bit32-LABEL: void @f16_as_i128({{.*}}sret([16 x i8])
 // bit64-LABEL: i128 @f16_as_i128(
 #[no_mangle]
diff --git a/tests/codegen/intrinsics/transmute-x64.rs b/tests/codegen/intrinsics/transmute-x64.rs
index fe68f183667..be45e4db90f 100644
--- a/tests/codegen/intrinsics/transmute-x64.rs
+++ b/tests/codegen/intrinsics/transmute-x64.rs
@@ -6,15 +6,6 @@
 use std::arch::x86_64::{__m128, __m128i, __m256i};
 use std::mem::transmute;
 
-// CHECK-LABEL: @check_sse_float_to_int(
-#[no_mangle]
-pub unsafe fn check_sse_float_to_int(x: __m128) -> __m128i {
-    // CHECK-NOT: alloca
-    // CHECK: %0 = load <4 x float>, ptr %x, align 16
-    // CHECK: store <4 x float> %0, ptr %_0, align 16
-    transmute(x)
-}
-
 // CHECK-LABEL: @check_sse_pair_to_avx(
 #[no_mangle]
 pub unsafe fn check_sse_pair_to_avx(x: (__m128i, __m128i)) -> __m256i {
diff --git a/tests/codegen/issues/issue-32031.rs b/tests/codegen/issues/issue-32031.rs
index 4d6895166f1..559e8d947fb 100644
--- a/tests/codegen/issues/issue-32031.rs
+++ b/tests/codegen/issues/issue-32031.rs
@@ -1,7 +1,7 @@
 //@ compile-flags: -C no-prepopulate-passes -Copt-level=0
 // 32-bit x86 returns `f32` and `f64` differently to avoid the x87 stack.
 //@ revisions: x86 other
-//@[x86] only-x86
+//@[x86] only-rustc_abi-x86-sse2
 //@[other] ignore-x86
 
 #![crate_type = "lib"]
@@ -10,7 +10,7 @@
 pub struct F32(f32);
 
 // other: define{{.*}}float @add_newtype_f32(float %a, float %b)
-// x86: define{{.*}}i32 @add_newtype_f32(float %a, float %b)
+// x86: define{{.*}}<4 x i8> @add_newtype_f32(float %a, float %b)
 #[inline(never)]
 #[no_mangle]
 pub fn add_newtype_f32(a: F32, b: F32) -> F32 {
@@ -21,7 +21,7 @@ pub fn add_newtype_f32(a: F32, b: F32) -> F32 {
 pub struct F64(f64);
 
 // other: define{{.*}}double @add_newtype_f64(double %a, double %b)
-// x86: define{{.*}}void @add_newtype_f64(ptr{{.*}}sret([8 x i8]){{.*}}%_0, double %a, double %b)
+// x86: define{{.*}}<8 x i8> @add_newtype_f64(double %a, double %b)
 #[inline(never)]
 #[no_mangle]
 pub fn add_newtype_f64(a: F64, b: F64) -> F64 {
diff --git a/tests/codegen/simd-intrinsic/simd-intrinsic-transmute-array.rs b/tests/codegen/simd-intrinsic/simd-intrinsic-transmute-array.rs
index 75f989d6e12..0d21d510557 100644
--- a/tests/codegen/simd-intrinsic/simd-intrinsic-transmute-array.rs
+++ b/tests/codegen/simd-intrinsic/simd-intrinsic-transmute-array.rs
@@ -1,5 +1,14 @@
 //
 //@ compile-flags: -C no-prepopulate-passes
+// LLVM IR isn't very portable and the one tested here depends on the ABI
+// which is different between x86 (where we use SSE registers) and others.
+// `x86-64` and `x86-32-sse2` are identical, but compiletest does not support
+// taking the union of multiple `only` annotations.
+//@ revisions: x86-64 x86-32-sse2 other
+//@[x86-64] only-x86_64
+//@[x86-32-sse2] only-rustc_abi-x86-sse2
+//@[other] ignore-rustc_abi-x86-sse2
+//@[other] ignore-x86_64
 
 #![crate_type = "lib"]
 #![allow(non_camel_case_types)]
@@ -38,7 +47,9 @@ pub fn build_array_s(x: [f32; 4]) -> S<4> {
 #[no_mangle]
 pub fn build_array_transmute_s(x: [f32; 4]) -> S<4> {
     // CHECK: %[[VAL:.+]] = load <4 x float>, ptr %x, align [[ARRAY_ALIGN]]
-    // CHECK: store <4 x float> %[[VAL:.+]], ptr %_0, align [[VECTOR_ALIGN]]
+    // x86-32: ret <4 x float> %[[VAL:.+]]
+    // x86-64: ret <4 x float> %[[VAL:.+]]
+    // other: store <4 x float> %[[VAL:.+]], ptr %_0, align [[VECTOR_ALIGN]]
     unsafe { std::mem::transmute(x) }
 }
 
@@ -53,6 +64,8 @@ pub fn build_array_t(x: [f32; 4]) -> T {
 #[no_mangle]
 pub fn build_array_transmute_t(x: [f32; 4]) -> T {
     // CHECK: %[[VAL:.+]] = load <4 x float>, ptr %x, align [[ARRAY_ALIGN]]
-    // CHECK: store <4 x float> %[[VAL:.+]], ptr %_0, align [[VECTOR_ALIGN]]
+    // x86-32: ret <4 x float> %[[VAL:.+]]
+    // x86-64: ret <4 x float> %[[VAL:.+]]
+    // other: store <4 x float> %[[VAL:.+]], ptr %_0, align [[VECTOR_ALIGN]]
     unsafe { std::mem::transmute(x) }
 }
diff --git a/tests/codegen/simd/packed-simd.rs b/tests/codegen/simd/packed-simd.rs
index 1df09c96e6c..a27d5e3af45 100644
--- a/tests/codegen/simd/packed-simd.rs
+++ b/tests/codegen/simd/packed-simd.rs
@@ -1,4 +1,5 @@
 //@ revisions:opt3 noopt
+//@ only-x86_64
 //@[opt3] compile-flags: -Copt-level=3
 //@[noopt] compile-flags: -Cno-prepopulate-passes
 
@@ -14,14 +15,14 @@ use core::{mem, ptr};
 
 #[repr(simd, packed)]
 #[derive(Copy, Clone)]
-pub struct Simd<T, const N: usize>([T; N]);
+pub struct PackedSimd<T, const N: usize>([T; N]);
 
 #[repr(simd)]
 #[derive(Copy, Clone)]
 pub struct FullSimd<T, const N: usize>([T; N]);
 
 // non-powers-of-two have padding and need to be expanded to full vectors
-fn load<T, const N: usize>(v: Simd<T, N>) -> FullSimd<T, N> {
+fn load<T, const N: usize>(v: PackedSimd<T, N>) -> FullSimd<T, N> {
     unsafe {
         let mut tmp = mem::MaybeUninit::<FullSimd<T, N>>::uninit();
         ptr::copy_nonoverlapping(&v as *const _, tmp.as_mut_ptr().cast(), 1);
@@ -29,18 +30,16 @@ fn load<T, const N: usize>(v: Simd<T, N>) -> FullSimd<T, N> {
     }
 }
 
-// CHECK-LABEL: square_packed_full
-// CHECK-SAME: ptr{{[a-z_ ]*}} sret([[RET_TYPE:[^)]+]]) [[RET_ALIGN:align (8|16)]]{{[^%]*}} [[RET_VREG:%[_0-9]*]]
-// CHECK-SAME: ptr{{[a-z_ ]*}} align 4
+// CHECK-LABEL: define <3 x float> @square_packed_full(ptr{{[a-z_ ]*}} align 4 {{[^,]*}})
 #[no_mangle]
-pub fn square_packed_full(x: Simd<f32, 3>) -> FullSimd<f32, 3> {
-    // CHECK-NEXT: start
-    // noopt: alloca [[RET_TYPE]], [[RET_ALIGN]]
-    // CHECK: load <3 x float>
+pub fn square_packed_full(x: PackedSimd<f32, 3>) -> FullSimd<f32, 3> {
+    // The unoptimized version of this is not very interesting to check
+    // since `load` does not get inlined.
+    // opt3-NEXT: start:
+    // opt3-NEXT: load <3 x float>
     let x = load(x);
-    // CHECK: [[VREG:%[a-z0-9_]+]] = fmul <3 x float>
-    // CHECK-NEXT: store <3 x float> [[VREG]], ptr [[RET_VREG]], [[RET_ALIGN]]
-    // CHECK-NEXT: ret void
+    // opt3-NEXT: [[VREG:%[a-z0-9_]+]] = fmul <3 x float>
+    // opt3-NEXT: ret <3 x float> [[VREG:%[a-z0-9_]+]]
     unsafe { intrinsics::simd_mul(x, x) }
 }
 
@@ -48,7 +47,7 @@ pub fn square_packed_full(x: Simd<f32, 3>) -> FullSimd<f32, 3> {
 // CHECK-SAME: ptr{{[a-z_ ]*}} sret([[RET_TYPE:[^)]+]]) [[RET_ALIGN:align 4]]{{[^%]*}} [[RET_VREG:%[_0-9]*]]
 // CHECK-SAME: ptr{{[a-z_ ]*}} align 4
 #[no_mangle]
-pub fn square_packed(x: Simd<f32, 3>) -> Simd<f32, 3> {
+pub fn square_packed(x: PackedSimd<f32, 3>) -> PackedSimd<f32, 3> {
     // CHECK-NEXT: start
     // CHECK-NEXT: load <3 x float>
     // noopt-NEXT: load <3 x float>
diff --git a/tests/codegen/union-abi.rs b/tests/codegen/union-abi.rs
index 92d40d8ac14..16022fad8af 100644
--- a/tests/codegen/union-abi.rs
+++ b/tests/codegen/union-abi.rs
@@ -2,8 +2,11 @@
 //@ compile-flags: -Copt-level=3 -C no-prepopulate-passes
 // 32-bit x86 returns `f32` differently to avoid the x87 stack.
 // 32-bit systems will return 128bit values using a return area pointer.
-//@ revisions: x86 bit32 bit64
-//@[x86] only-x86
+//@ revisions: x86-sse x86-nosse bit32 bit64
+//@[x86-sse] only-x86
+//@[x86-sse] only-rustc_abi-x86-sse2
+//@[x86-nosse] only-x86
+//@[x86-nosse] ignore-rustc_abi-x86-sse2
 //@[bit32] ignore-x86
 //@[bit32] only-32bit
 //@[bit64] ignore-x86
@@ -75,7 +78,8 @@ pub union UnionF32 {
     a: f32,
 }
 
-// x86: define {{(dso_local )?}}i32 @test_UnionF32(float %_1)
+// x86-sse: define {{(dso_local )?}}<4 x i8> @test_UnionF32(float %_1)
+// x86-nosse: define {{(dso_local )?}}i32 @test_UnionF32(float %_1)
 // bit32: define {{(dso_local )?}}float @test_UnionF32(float %_1)
 // bit64: define {{(dso_local )?}}float @test_UnionF32(float %_1)
 #[no_mangle]
@@ -88,7 +92,8 @@ pub union UnionF32F32 {
     b: f32,
 }
 
-// x86: define {{(dso_local )?}}i32 @test_UnionF32F32(float %_1)
+// x86-sse: define {{(dso_local )?}}<4 x i8> @test_UnionF32F32(float %_1)
+// x86-nosse: define {{(dso_local )?}}i32 @test_UnionF32F32(float %_1)
 // bit32: define {{(dso_local )?}}float @test_UnionF32F32(float %_1)
 // bit64: define {{(dso_local )?}}float @test_UnionF32F32(float %_1)
 #[no_mangle]
@@ -110,7 +115,8 @@ pub fn test_UnionF32U32(_: UnionF32U32) -> UnionF32U32 {
 pub union UnionU128 {
     a: u128,
 }
-// x86: define {{(dso_local )?}}void @test_UnionU128({{.*}}sret([16 x i8]){{.*}}, i128 %_1)
+// x86-sse: define {{(dso_local )?}}void @test_UnionU128({{.*}}sret([16 x i8]){{.*}}, i128 %_1)
+// x86-nosse: define {{(dso_local )?}}void @test_UnionU128({{.*}}sret([16 x i8]){{.*}}, i128 %_1)
 // bit32: define {{(dso_local )?}}void @test_UnionU128({{.*}}sret([16 x i8]){{.*}}, i128 %_1)
 // bit64: define {{(dso_local )?}}i128 @test_UnionU128(i128 %_1)
 #[no_mangle]
author	Ralf Jung <post@ralfj.de>	2025-01-29 12:23:15 +0100
committer	Ralf Jung <post@ralfj.de>	2025-02-18 16:11:41 +0100
commit	803feb5dc679556ba2d93fe616d1883d7ebc7775 (patch)
tree	92c7d2c3167497ef56c18b26bd94c0afc6ec61a7 /tests/codegen
parent	de91711756133f11f7861dd099c001eaff0aafaa (diff)
download	rust-803feb5dc679556ba2d93fe616d1883d7ebc7775.tar.gz rust-803feb5dc679556ba2d93fe616d1883d7ebc7775.zip