diff options
| author | Ralf Jung <post@ralfj.de> | 2025-01-29 12:23:15 +0100 |
|---|---|---|
| committer | Ralf Jung <post@ralfj.de> | 2025-02-18 16:11:41 +0100 |
| commit | 803feb5dc679556ba2d93fe616d1883d7ebc7775 (patch) | |
| tree | 92c7d2c3167497ef56c18b26bd94c0afc6ec61a7 /tests/codegen | |
| parent | de91711756133f11f7861dd099c001eaff0aafaa (diff) | |
| download | rust-803feb5dc679556ba2d93fe616d1883d7ebc7775.tar.gz rust-803feb5dc679556ba2d93fe616d1883d7ebc7775.zip | |
x86-sse2 ABI: use SSE registers for floats and SIMD
Diffstat (limited to 'tests/codegen')
| -rw-r--r-- | tests/codegen/abi-x86-sse.rs | 36 | ||||
| -rw-r--r-- | tests/codegen/float/f128.rs | 82 | ||||
| -rw-r--r-- | tests/codegen/float/f16.rs | 43 | ||||
| -rw-r--r-- | tests/codegen/intrinsics/transmute-x64.rs | 9 | ||||
| -rw-r--r-- | tests/codegen/issues/issue-32031.rs | 6 | ||||
| -rw-r--r-- | tests/codegen/simd-intrinsic/simd-intrinsic-transmute-array.rs | 17 | ||||
| -rw-r--r-- | tests/codegen/simd/packed-simd.rs | 25 | ||||
| -rw-r--r-- | tests/codegen/union-abi.rs | 16 |
8 files changed, 161 insertions, 73 deletions
diff --git a/tests/codegen/abi-x86-sse.rs b/tests/codegen/abi-x86-sse.rs new file mode 100644 index 00000000000..837bf6134b0 --- /dev/null +++ b/tests/codegen/abi-x86-sse.rs @@ -0,0 +1,36 @@ +//@ compile-flags: -Z merge-functions=disabled + +//@ revisions: x86-64 +//@[x86-64] compile-flags: --target x86_64-unknown-linux-gnu +//@[x86-64] needs-llvm-components: x86 + +//@ revisions: x86-32 +//@[x86-32] compile-flags: --target i686-unknown-linux-gnu +//@[x86-32] needs-llvm-components: x86 + +//@ revisions: x86-32-nosse +//@[x86-32-nosse] compile-flags: --target i586-unknown-linux-gnu +//@[x86-32-nosse] needs-llvm-components: x86 + +#![feature(no_core, lang_items, rustc_attrs, repr_simd)] +#![no_core] +#![crate_type = "lib"] + +#[lang = "sized"] +trait Sized {} + +#[lang = "copy"] +trait Copy {} + +// Ensure this type is passed without ptr indirection on targets that +// require SSE2. +#[repr(simd)] +pub struct Sse([f32; 4]); + +// x86-64: <4 x float> @sse_id(<4 x float> {{[^,]*}}) +// x86-32: <4 x float> @sse_id(<4 x float> {{[^,]*}}) +// x86-32-nosse: void @sse_id(ptr{{( [^,]*)?}} sret([16 x i8]){{( .*)?}}, ptr{{( [^,]*)?}}) +#[no_mangle] +pub fn sse_id(x: Sse) -> Sse { + x +} diff --git a/tests/codegen/float/f128.rs b/tests/codegen/float/f128.rs index 562a8e6c9e9..d87bab1172a 100644 --- a/tests/codegen/float/f128.rs +++ b/tests/codegen/float/f128.rs @@ -1,8 +1,11 @@ // 32-bit x86 returns float types differently to avoid the x87 stack. // 32-bit systems will return 128bit values using a return area pointer. // Emscripten aligns f128 to 8 bytes, not 16. -//@ revisions: x86 bit32 bit64 emscripten -//@[x86] only-x86 +//@ revisions: x86-sse x86-nosse bit32 bit64 emscripten +//@[x86-sse] only-x86 +//@[x86-sse] only-rustc_abi-x86-sse2 +//@[x86-nosse] only-x86 +//@[x86-nosse] ignore-rustc_abi-x86-sse2 //@[bit32] ignore-x86 //@[bit32] ignore-emscripten //@[bit32] only-32bit @@ -60,7 +63,8 @@ pub fn f128_le(a: f128, b: f128) -> bool { a <= b } -// x86-LABEL: void @f128_neg({{.*}}sret([16 x i8]) +// x86-nosse-LABEL: void @f128_neg({{.*}}sret([16 x i8]) +// x86-sse-LABEL: <16 x i8> @f128_neg(fp128 // bit32-LABEL: void @f128_neg({{.*}}sret([16 x i8]) // bit64-LABEL: fp128 @f128_neg( // emscripten-LABEL: void @f128_neg({{.*}}sret([16 x i8]) @@ -70,7 +74,8 @@ pub fn f128_neg(a: f128) -> f128 { -a } -// x86-LABEL: void @f128_add({{.*}}sret([16 x i8]) +// x86-nosse-LABEL: void @f128_add({{.*}}sret([16 x i8]) +// x86-sse-LABEL: <16 x i8> @f128_add(fp128 // bit32-LABEL: void @f128_add({{.*}}sret([16 x i8]) // bit64-LABEL: fp128 @f128_add( // emscripten-LABEL: void @f128_add({{.*}}sret([16 x i8]) @@ -80,7 +85,8 @@ pub fn f128_add(a: f128, b: f128) -> f128 { a + b } -// x86-LABEL: void @f128_sub({{.*}}sret([16 x i8]) +// x86-nosse-LABEL: void @f128_sub({{.*}}sret([16 x i8]) +// x86-sse-LABEL: <16 x i8> @f128_sub(fp128 // bit32-LABEL: void @f128_sub({{.*}}sret([16 x i8]) // bit64-LABEL: fp128 @f128_sub( // emscripten-LABEL: void @f128_sub({{.*}}sret([16 x i8]) @@ -90,7 +96,8 @@ pub fn f128_sub(a: f128, b: f128) -> f128 { a - b } -// x86-LABEL: void @f128_mul({{.*}}sret([16 x i8]) +// x86-nosse-LABEL: void @f128_mul({{.*}}sret([16 x i8]) +// x86-sse-LABEL: <16 x i8> @f128_mul(fp128 // bit32-LABEL: void @f128_mul({{.*}}sret([16 x i8]) // bit64-LABEL: fp128 @f128_mul( // emscripten-LABEL: void @f128_mul({{.*}}sret([16 x i8]) @@ -100,7 +107,8 @@ pub fn f128_mul(a: f128, b: f128) -> f128 { a * b } -// x86-LABEL: void @f128_div({{.*}}sret([16 x i8]) +// x86-nosse-LABEL: void @f128_div({{.*}}sret([16 x i8]) +// x86-sse-LABEL: <16 x i8> @f128_div(fp128 // bit32-LABEL: void @f128_div({{.*}}sret([16 x i8]) // bit64-LABEL: fp128 @f128_div( // emscripten-LABEL: void @f128_div({{.*}}sret([16 x i8]) @@ -110,7 +118,8 @@ pub fn f128_div(a: f128, b: f128) -> f128 { a / b } -// x86-LABEL: void @f128_rem({{.*}}sret([16 x i8]) +// x86-nosse-LABEL: void @f128_rem({{.*}}sret([16 x i8]) +// x86-sse-LABEL: <16 x i8> @f128_rem(fp128 // bit32-LABEL: void @f128_rem({{.*}}sret([16 x i8]) // bit64-LABEL: fp128 @f128_rem( // emscripten-LABEL: void @f128_rem({{.*}}sret([16 x i8]) @@ -162,7 +171,8 @@ pub fn f128_rem_assign(a: &mut f128, b: f128) { /* float to float conversions */ -// x86-LABEL: i16 @f128_as_f16( +// x86-sse-LABEL: <2 x i8> @f128_as_f16( +// x86-nosse-LABEL: i16 @f128_as_f16( // bits32-LABEL: half @f128_as_f16( // bits64-LABEL: half @f128_as_f16( #[no_mangle] @@ -171,7 +181,8 @@ pub fn f128_as_f16(a: f128) -> f16 { a as f16 } -// x86-LABEL: i32 @f128_as_f32( +// x86-sse-LABEL: <4 x i8> @f128_as_f32( +// x86-nosse-LABEL: i32 @f128_as_f32( // bit32-LABEL: float @f128_as_f32( // bit64-LABEL: float @f128_as_f32( // emscripten-LABEL: float @f128_as_f32( @@ -181,7 +192,8 @@ pub fn f128_as_f32(a: f128) -> f32 { a as f32 } -// x86-LABEL: void @f128_as_f64( +// x86-sse-LABEL: <8 x i8> @f128_as_f64( +// x86-nosse-LABEL: void @f128_as_f64({{.*}}sret([8 x i8]) // bit32-LABEL: double @f128_as_f64( // bit64-LABEL: double @f128_as_f64( // emscripten-LABEL: double @f128_as_f64( @@ -191,7 +203,8 @@ pub fn f128_as_f64(a: f128) -> f64 { a as f64 } -// x86-LABEL: void @f128_as_self({{.*}}sret([16 x i8]) +// x86-sse-LABEL: <16 x i8> @f128_as_self( +// x86-nosse-LABEL: void @f128_as_self({{.*}}sret([16 x i8]) // bit32-LABEL: void @f128_as_self({{.*}}sret([16 x i8]) // bit64-LABEL: fp128 @f128_as_self( // emscripten-LABEL: void @f128_as_self({{.*}}sret([16 x i8]) @@ -204,7 +217,8 @@ pub fn f128_as_self(a: f128) -> f128 { a as f128 } -// x86-LABEL: void @f16_as_f128({{.*}}sret([16 x i8]) +// x86-sse-LABEL: <16 x i8> @f16_as_f128( +// x86-nosse-LABEL: void @f16_as_f128({{.*}}sret([16 x i8]) // bit32-LABEL: void @f16_as_f128({{.*}}sret([16 x i8]) // bit64-LABEL: fp128 @f16_as_f128( // emscripten-LABEL: void @f16_as_f128({{.*}}sret([16 x i8]) @@ -214,7 +228,8 @@ pub fn f16_as_f128(a: f16) -> f128 { a as f128 } -// x86-LABEL: void @f32_as_f128({{.*}}sret([16 x i8]) +// x86-sse-LABEL: <16 x i8> @f32_as_f128( +// x86-nosse-LABEL: void @f32_as_f128({{.*}}sret([16 x i8]) // bit32-LABEL: void @f32_as_f128({{.*}}sret([16 x i8]) // bit64-LABEL: fp128 @f32_as_f128( // emscripten-LABEL: void @f32_as_f128({{.*}}sret([16 x i8]) @@ -224,7 +239,8 @@ pub fn f32_as_f128(a: f32) -> f128 { a as f128 } -// x86-LABEL: void @f64_as_f128({{.*}}sret([16 x i8]) +// x86-sse-LABEL: <16 x i8> @f64_as_f128( +// x86-nosse-LABEL: void @f64_as_f128({{.*}}sret([16 x i8]) // bit32-LABEL: void @f64_as_f128({{.*}}sret([16 x i8]) // bit64-LABEL: fp128 @f64_as_f128( // emscripten-LABEL: void @f64_as_f128({{.*}}sret([16 x i8]) @@ -263,7 +279,8 @@ pub fn f128_as_u64(a: f128) -> u64 { a as u64 } -// x86-LABEL: void @f128_as_u128({{.*}}sret([16 x i8]) +// x86-sse-LABEL: void @f128_as_u128({{.*}}sret([16 x i8]) +// x86-nosse-LABEL: void @f128_as_u128({{.*}}sret([16 x i8]) // bit32-LABEL: void @f128_as_u128({{.*}}sret([16 x i8]) // bit64-LABEL: i128 @f128_as_u128( // emscripten-LABEL: void @f128_as_u128({{.*}}sret([16 x i8]) @@ -300,7 +317,8 @@ pub fn f128_as_i64(a: f128) -> i64 { a as i64 } -// x86-LABEL: void @f128_as_i128({{.*}}sret([16 x i8]) +// x86-sse-LABEL: void @f128_as_i128({{.*}}sret([16 x i8]) +// x86-nosse-LABEL: void @f128_as_i128({{.*}}sret([16 x i8]) // bit32-LABEL: void @f128_as_i128({{.*}}sret([16 x i8]) // bit64-LABEL: i128 @f128_as_i128( // emscripten-LABEL: void @f128_as_i128({{.*}}sret([16 x i8]) @@ -312,7 +330,8 @@ pub fn f128_as_i128(a: f128) -> i128 { /* int to float conversions */ -// x86-LABEL: void @u8_as_f128({{.*}}sret([16 x i8]) +// x86-sse-LABEL: <16 x i8> @u8_as_f128( +// x86-nosse-LABEL: void @u8_as_f128({{.*}}sret([16 x i8]) // bit32-LABEL: void @u8_as_f128({{.*}}sret([16 x i8]) // bit64-LABEL: fp128 @u8_as_f128( // emscripten-LABEL: void @u8_as_f128({{.*}}sret([16 x i8]) @@ -322,7 +341,8 @@ pub fn u8_as_f128(a: u8) -> f128 { a as f128 } -// x86-LABEL: void @u16_as_f128({{.*}}sret([16 x i8]) +// x86-sse-LABEL: <16 x i8> @u16_as_f128( +// x86-nosse-LABEL: void @u16_as_f128({{.*}}sret([16 x i8]) // bit32-LABEL: void @u16_as_f128({{.*}}sret([16 x i8]) // bit64-LABEL: fp128 @u16_as_f128( // emscripten-LABEL: void @u16_as_f128({{.*}}sret([16 x i8]) @@ -332,7 +352,8 @@ pub fn u16_as_f128(a: u16) -> f128 { a as f128 } -// x86-LABEL: void @u32_as_f128({{.*}}sret([16 x i8]) +// x86-sse-LABEL: <16 x i8> @u32_as_f128( +// x86-nosse-LABEL: void @u32_as_f128({{.*}}sret([16 x i8]) // bit32-LABEL: void @u32_as_f128({{.*}}sret([16 x i8]) // bit64-LABEL: fp128 @u32_as_f128( // emscripten-LABEL: void @u32_as_f128({{.*}}sret([16 x i8]) @@ -342,7 +363,8 @@ pub fn u32_as_f128(a: u32) -> f128 { a as f128 } -// x86-LABEL: void @u64_as_f128({{.*}}sret([16 x i8]) +// x86-sse-LABEL: <16 x i8> @u64_as_f128( +// x86-nosse-LABEL: void @u64_as_f128({{.*}}sret([16 x i8]) // bit32-LABEL: void @u64_as_f128({{.*}}sret([16 x i8]) // bit64-LABEL: fp128 @u64_as_f128( // emscripten-LABEL: void @u64_as_f128({{.*}}sret([16 x i8]) @@ -352,7 +374,8 @@ pub fn u64_as_f128(a: u64) -> f128 { a as f128 } -// x86-LABEL: void @u128_as_f128({{.*}}sret([16 x i8]) +// x86-sse-LABEL: <16 x i8> @u128_as_f128( +// x86-nosse-LABEL: void @u128_as_f128({{.*}}sret([16 x i8]) // bit32-LABEL: void @u128_as_f128({{.*}}sret([16 x i8]) // bit64-LABEL: fp128 @u128_as_f128( // emscripten-LABEL: void @u128_as_f128({{.*}}sret([16 x i8]) @@ -362,7 +385,8 @@ pub fn u128_as_f128(a: u128) -> f128 { a as f128 } -// x86-LABEL: void @i8_as_f128({{.*}}sret([16 x i8]) +// x86-sse-LABEL: <16 x i8> @i8_as_f128( +// x86-nosse-LABEL: void @i8_as_f128({{.*}}sret([16 x i8]) // bit32-LABEL: void @i8_as_f128({{.*}}sret([16 x i8]) // bit64-LABEL: fp128 @i8_as_f128( // emscripten-LABEL: void @i8_as_f128({{.*}}sret([16 x i8]) @@ -372,7 +396,8 @@ pub fn i8_as_f128(a: i8) -> f128 { a as f128 } -// x86-LABEL: void @i16_as_f128({{.*}}sret([16 x i8]) +// x86-sse-LABEL: <16 x i8> @i16_as_f128( +// x86-nosse-LABEL: void @i16_as_f128({{.*}}sret([16 x i8]) // bit32-LABEL: void @i16_as_f128({{.*}}sret([16 x i8]) // bit64-LABEL: fp128 @i16_as_f128( // emscripten-LABEL: void @i16_as_f128({{.*}}sret([16 x i8]) @@ -382,7 +407,8 @@ pub fn i16_as_f128(a: i16) -> f128 { a as f128 } -// x86-LABEL: void @i32_as_f128({{.*}}sret([16 x i8]) +// x86-sse-LABEL: <16 x i8> @i32_as_f128( +// x86-nosse-LABEL: void @i32_as_f128({{.*}}sret([16 x i8]) // bit32-LABEL: void @i32_as_f128({{.*}}sret([16 x i8]) // bit64-LABEL: fp128 @i32_as_f128( // emscripten-LABEL: void @i32_as_f128({{.*}}sret([16 x i8]) @@ -392,7 +418,8 @@ pub fn i32_as_f128(a: i32) -> f128 { a as f128 } -// x86-LABEL: void @i64_as_f128({{.*}}sret([16 x i8]) +// x86-sse-LABEL: <16 x i8> @i64_as_f128( +// x86-nosse-LABEL: void @i64_as_f128({{.*}}sret([16 x i8]) // bit32-LABEL: void @i64_as_f128({{.*}}sret([16 x i8]) // bit64-LABEL: fp128 @i64_as_f128( // emscripten-LABEL: void @i64_as_f128({{.*}}sret([16 x i8]) @@ -402,7 +429,8 @@ pub fn i64_as_f128(a: i64) -> f128 { a as f128 } -// x86-LABEL: void @i128_as_f128({{.*}}sret([16 x i8]) +// x86-sse-LABEL: <16 x i8> @i128_as_f128( +// x86-nosse-LABEL: void @i128_as_f128({{.*}}sret([16 x i8]) // bit32-LABEL: void @i128_as_f128({{.*}}sret([16 x i8]) // bit64-LABEL: fp128 @i128_as_f128( // emscripten-LABEL: void @i128_as_f128({{.*}}sret([16 x i8]) diff --git a/tests/codegen/float/f16.rs b/tests/codegen/float/f16.rs index 5c3a5893b9d..0c40606ad8a 100644 --- a/tests/codegen/float/f16.rs +++ b/tests/codegen/float/f16.rs @@ -1,7 +1,10 @@ // 32-bit x86 returns float types differently to avoid the x87 stack. // 32-bit systems will return 128bit values using a return area pointer. -//@ revisions: x86 bit32 bit64 -//@[x86] only-x86 +//@ revisions: x86-sse x86-nosse bit32 bit64 +//@[x86-sse] only-x86 +//@[x86-sse] only-rustc_abi-x86-sse2 +//@[x86-nosse] only-x86 +//@[x86-nosse] ignore-rustc_abi-x86-sse2 //@[bit32] ignore-x86 //@[bit32] only-32bit //@[bit64] ignore-x86 @@ -59,8 +62,10 @@ pub fn f16_le(a: f16, b: f16) -> bool { } // This is where we check the argument and return ABI for f16. -// other-LABEL: half @f16_neg(half -// x86-LABEL: i16 @f16_neg(half +// bit32-LABEL: half @f16_neg(half +// bit64-LABEL: half @f16_neg(half +// x86-sse-LABEL: <2 x i8> @f16_neg(half +// x86-nosse-LABEL: i16 @f16_neg(half #[no_mangle] pub fn f16_neg(a: f16) -> f16 { // CHECK: fneg half %{{.+}} @@ -144,17 +149,23 @@ pub fn f16_rem_assign(a: &mut f16, b: f16) { /* float to float conversions */ -// other-LABEL: half @f16_as_self( -// x86-LABEL: i16 @f16_as_self( +// bit32-LABEL: half @f16_as_self( +// bit64-LABEL: half @f16_as_self( +// x86-sse-LABEL: <2 x i8> @f16_as_self( +// x86-nosse-LABEL: i16 @f16_as_self( #[no_mangle] pub fn f16_as_self(a: f16) -> f16 { - // other-CHECK: ret half %{{.+}} - // x86-CHECK: bitcast half - // x86-CHECK: ret i16 + // bit32-CHECK: ret half %{{.+}} + // bit64-CHECK: ret half %{{.+}} + // x86-sse-CHECK: bitcast half + // x86-nosse-CHECK: bitcast half + // x86-sse-CHECK: ret i16 + // x86-nosse-CHECK: ret i16 a as f16 } -// x86-LABEL: i32 @f16_as_f32( +// x86-sse-LABEL: <4 x i8> @f16_as_f32( +// x86-nosse-LABEL: i32 @f16_as_f32( // bit32-LABEL: float @f16_as_f32( // bit64-LABEL: float @f16_as_f32( #[no_mangle] @@ -163,7 +174,8 @@ pub fn f16_as_f32(a: f16) -> f32 { a as f32 } -// x86-LABEL: void @f16_as_f64( +// x86-sse-LABEL: <8 x i8> @f16_as_f64( +// x86-nosse-LABEL: void @f16_as_f64({{.*}}sret([8 x i8]) // bit32-LABEL: double @f16_as_f64( // bit64-LABEL: double @f16_as_f64( #[no_mangle] @@ -172,7 +184,8 @@ pub fn f16_as_f64(a: f16) -> f64 { a as f64 } -// x86-LABEL: void @f16_as_f128({{.*}}sret([16 x i8]) +// x86-sse-LABEL: <16 x i8> @f16_as_f128( +// x86-nosse-LABEL: void @f16_as_f128({{.*}}sret([16 x i8]) // bit32-LABEL: void @f16_as_f128({{.*}}sret([16 x i8]) // bit64-LABEL: fp128 @f16_as_f128( #[no_mangle] @@ -231,7 +244,8 @@ pub fn f16_as_u64(a: f16) -> u64 { a as u64 } -// x86-LABEL: void @f16_as_u128({{.*}}sret([16 x i8]) +// x86-sse-LABEL: void @f16_as_u128({{.*}}sret([16 x i8]) +// x86-nosse-LABEL: void @f16_as_u128({{.*}}sret([16 x i8]) // bit32-LABEL: void @f16_as_u128({{.*}}sret([16 x i8]) // bit64-LABEL: i128 @f16_as_u128( #[no_mangle] @@ -267,7 +281,8 @@ pub fn f16_as_i64(a: f16) -> i64 { a as i64 } -// x86-LABEL: void @f16_as_i128({{.*}}sret([16 x i8]) +// x86-sse-LABEL: void @f16_as_i128({{.*}}sret([16 x i8]) +// x86-nosse-LABEL: void @f16_as_i128({{.*}}sret([16 x i8]) // bit32-LABEL: void @f16_as_i128({{.*}}sret([16 x i8]) // bit64-LABEL: i128 @f16_as_i128( #[no_mangle] diff --git a/tests/codegen/intrinsics/transmute-x64.rs b/tests/codegen/intrinsics/transmute-x64.rs index fe68f183667..be45e4db90f 100644 --- a/tests/codegen/intrinsics/transmute-x64.rs +++ b/tests/codegen/intrinsics/transmute-x64.rs @@ -6,15 +6,6 @@ use std::arch::x86_64::{__m128, __m128i, __m256i}; use std::mem::transmute; -// CHECK-LABEL: @check_sse_float_to_int( -#[no_mangle] -pub unsafe fn check_sse_float_to_int(x: __m128) -> __m128i { - // CHECK-NOT: alloca - // CHECK: %0 = load <4 x float>, ptr %x, align 16 - // CHECK: store <4 x float> %0, ptr %_0, align 16 - transmute(x) -} - // CHECK-LABEL: @check_sse_pair_to_avx( #[no_mangle] pub unsafe fn check_sse_pair_to_avx(x: (__m128i, __m128i)) -> __m256i { diff --git a/tests/codegen/issues/issue-32031.rs b/tests/codegen/issues/issue-32031.rs index 4d6895166f1..559e8d947fb 100644 --- a/tests/codegen/issues/issue-32031.rs +++ b/tests/codegen/issues/issue-32031.rs @@ -1,7 +1,7 @@ //@ compile-flags: -C no-prepopulate-passes -Copt-level=0 // 32-bit x86 returns `f32` and `f64` differently to avoid the x87 stack. //@ revisions: x86 other -//@[x86] only-x86 +//@[x86] only-rustc_abi-x86-sse2 //@[other] ignore-x86 #![crate_type = "lib"] @@ -10,7 +10,7 @@ pub struct F32(f32); // other: define{{.*}}float @add_newtype_f32(float %a, float %b) -// x86: define{{.*}}i32 @add_newtype_f32(float %a, float %b) +// x86: define{{.*}}<4 x i8> @add_newtype_f32(float %a, float %b) #[inline(never)] #[no_mangle] pub fn add_newtype_f32(a: F32, b: F32) -> F32 { @@ -21,7 +21,7 @@ pub fn add_newtype_f32(a: F32, b: F32) -> F32 { pub struct F64(f64); // other: define{{.*}}double @add_newtype_f64(double %a, double %b) -// x86: define{{.*}}void @add_newtype_f64(ptr{{.*}}sret([8 x i8]){{.*}}%_0, double %a, double %b) +// x86: define{{.*}}<8 x i8> @add_newtype_f64(double %a, double %b) #[inline(never)] #[no_mangle] pub fn add_newtype_f64(a: F64, b: F64) -> F64 { diff --git a/tests/codegen/simd-intrinsic/simd-intrinsic-transmute-array.rs b/tests/codegen/simd-intrinsic/simd-intrinsic-transmute-array.rs index 75f989d6e12..0d21d510557 100644 --- a/tests/codegen/simd-intrinsic/simd-intrinsic-transmute-array.rs +++ b/tests/codegen/simd-intrinsic/simd-intrinsic-transmute-array.rs @@ -1,5 +1,14 @@ // //@ compile-flags: -C no-prepopulate-passes +// LLVM IR isn't very portable and the one tested here depends on the ABI +// which is different between x86 (where we use SSE registers) and others. +// `x86-64` and `x86-32-sse2` are identical, but compiletest does not support +// taking the union of multiple `only` annotations. +//@ revisions: x86-64 x86-32-sse2 other +//@[x86-64] only-x86_64 +//@[x86-32-sse2] only-rustc_abi-x86-sse2 +//@[other] ignore-rustc_abi-x86-sse2 +//@[other] ignore-x86_64 #![crate_type = "lib"] #![allow(non_camel_case_types)] @@ -38,7 +47,9 @@ pub fn build_array_s(x: [f32; 4]) -> S<4> { #[no_mangle] pub fn build_array_transmute_s(x: [f32; 4]) -> S<4> { // CHECK: %[[VAL:.+]] = load <4 x float>, ptr %x, align [[ARRAY_ALIGN]] - // CHECK: store <4 x float> %[[VAL:.+]], ptr %_0, align [[VECTOR_ALIGN]] + // x86-32: ret <4 x float> %[[VAL:.+]] + // x86-64: ret <4 x float> %[[VAL:.+]] + // other: store <4 x float> %[[VAL:.+]], ptr %_0, align [[VECTOR_ALIGN]] unsafe { std::mem::transmute(x) } } @@ -53,6 +64,8 @@ pub fn build_array_t(x: [f32; 4]) -> T { #[no_mangle] pub fn build_array_transmute_t(x: [f32; 4]) -> T { // CHECK: %[[VAL:.+]] = load <4 x float>, ptr %x, align [[ARRAY_ALIGN]] - // CHECK: store <4 x float> %[[VAL:.+]], ptr %_0, align [[VECTOR_ALIGN]] + // x86-32: ret <4 x float> %[[VAL:.+]] + // x86-64: ret <4 x float> %[[VAL:.+]] + // other: store <4 x float> %[[VAL:.+]], ptr %_0, align [[VECTOR_ALIGN]] unsafe { std::mem::transmute(x) } } diff --git a/tests/codegen/simd/packed-simd.rs b/tests/codegen/simd/packed-simd.rs index 1df09c96e6c..a27d5e3af45 100644 --- a/tests/codegen/simd/packed-simd.rs +++ b/tests/codegen/simd/packed-simd.rs @@ -1,4 +1,5 @@ //@ revisions:opt3 noopt +//@ only-x86_64 //@[opt3] compile-flags: -Copt-level=3 //@[noopt] compile-flags: -Cno-prepopulate-passes @@ -14,14 +15,14 @@ use core::{mem, ptr}; #[repr(simd, packed)] #[derive(Copy, Clone)] -pub struct Simd<T, const N: usize>([T; N]); +pub struct PackedSimd<T, const N: usize>([T; N]); #[repr(simd)] #[derive(Copy, Clone)] pub struct FullSimd<T, const N: usize>([T; N]); // non-powers-of-two have padding and need to be expanded to full vectors -fn load<T, const N: usize>(v: Simd<T, N>) -> FullSimd<T, N> { +fn load<T, const N: usize>(v: PackedSimd<T, N>) -> FullSimd<T, N> { unsafe { let mut tmp = mem::MaybeUninit::<FullSimd<T, N>>::uninit(); ptr::copy_nonoverlapping(&v as *const _, tmp.as_mut_ptr().cast(), 1); @@ -29,18 +30,16 @@ fn load<T, const N: usize>(v: Simd<T, N>) -> FullSimd<T, N> { } } -// CHECK-LABEL: square_packed_full -// CHECK-SAME: ptr{{[a-z_ ]*}} sret([[RET_TYPE:[^)]+]]) [[RET_ALIGN:align (8|16)]]{{[^%]*}} [[RET_VREG:%[_0-9]*]] -// CHECK-SAME: ptr{{[a-z_ ]*}} align 4 +// CHECK-LABEL: define <3 x float> @square_packed_full(ptr{{[a-z_ ]*}} align 4 {{[^,]*}}) #[no_mangle] -pub fn square_packed_full(x: Simd<f32, 3>) -> FullSimd<f32, 3> { - // CHECK-NEXT: start - // noopt: alloca [[RET_TYPE]], [[RET_ALIGN]] - // CHECK: load <3 x float> +pub fn square_packed_full(x: PackedSimd<f32, 3>) -> FullSimd<f32, 3> { + // The unoptimized version of this is not very interesting to check + // since `load` does not get inlined. + // opt3-NEXT: start: + // opt3-NEXT: load <3 x float> let x = load(x); - // CHECK: [[VREG:%[a-z0-9_]+]] = fmul <3 x float> - // CHECK-NEXT: store <3 x float> [[VREG]], ptr [[RET_VREG]], [[RET_ALIGN]] - // CHECK-NEXT: ret void + // opt3-NEXT: [[VREG:%[a-z0-9_]+]] = fmul <3 x float> + // opt3-NEXT: ret <3 x float> [[VREG:%[a-z0-9_]+]] unsafe { intrinsics::simd_mul(x, x) } } @@ -48,7 +47,7 @@ pub fn square_packed_full(x: Simd<f32, 3>) -> FullSimd<f32, 3> { // CHECK-SAME: ptr{{[a-z_ ]*}} sret([[RET_TYPE:[^)]+]]) [[RET_ALIGN:align 4]]{{[^%]*}} [[RET_VREG:%[_0-9]*]] // CHECK-SAME: ptr{{[a-z_ ]*}} align 4 #[no_mangle] -pub fn square_packed(x: Simd<f32, 3>) -> Simd<f32, 3> { +pub fn square_packed(x: PackedSimd<f32, 3>) -> PackedSimd<f32, 3> { // CHECK-NEXT: start // CHECK-NEXT: load <3 x float> // noopt-NEXT: load <3 x float> diff --git a/tests/codegen/union-abi.rs b/tests/codegen/union-abi.rs index 92d40d8ac14..16022fad8af 100644 --- a/tests/codegen/union-abi.rs +++ b/tests/codegen/union-abi.rs @@ -2,8 +2,11 @@ //@ compile-flags: -Copt-level=3 -C no-prepopulate-passes // 32-bit x86 returns `f32` differently to avoid the x87 stack. // 32-bit systems will return 128bit values using a return area pointer. -//@ revisions: x86 bit32 bit64 -//@[x86] only-x86 +//@ revisions: x86-sse x86-nosse bit32 bit64 +//@[x86-sse] only-x86 +//@[x86-sse] only-rustc_abi-x86-sse2 +//@[x86-nosse] only-x86 +//@[x86-nosse] ignore-rustc_abi-x86-sse2 //@[bit32] ignore-x86 //@[bit32] only-32bit //@[bit64] ignore-x86 @@ -75,7 +78,8 @@ pub union UnionF32 { a: f32, } -// x86: define {{(dso_local )?}}i32 @test_UnionF32(float %_1) +// x86-sse: define {{(dso_local )?}}<4 x i8> @test_UnionF32(float %_1) +// x86-nosse: define {{(dso_local )?}}i32 @test_UnionF32(float %_1) // bit32: define {{(dso_local )?}}float @test_UnionF32(float %_1) // bit64: define {{(dso_local )?}}float @test_UnionF32(float %_1) #[no_mangle] @@ -88,7 +92,8 @@ pub union UnionF32F32 { b: f32, } -// x86: define {{(dso_local )?}}i32 @test_UnionF32F32(float %_1) +// x86-sse: define {{(dso_local )?}}<4 x i8> @test_UnionF32F32(float %_1) +// x86-nosse: define {{(dso_local )?}}i32 @test_UnionF32F32(float %_1) // bit32: define {{(dso_local )?}}float @test_UnionF32F32(float %_1) // bit64: define {{(dso_local )?}}float @test_UnionF32F32(float %_1) #[no_mangle] @@ -110,7 +115,8 @@ pub fn test_UnionF32U32(_: UnionF32U32) -> UnionF32U32 { pub union UnionU128 { a: u128, } -// x86: define {{(dso_local )?}}void @test_UnionU128({{.*}}sret([16 x i8]){{.*}}, i128 %_1) +// x86-sse: define {{(dso_local )?}}void @test_UnionU128({{.*}}sret([16 x i8]){{.*}}, i128 %_1) +// x86-nosse: define {{(dso_local )?}}void @test_UnionU128({{.*}}sret([16 x i8]){{.*}}, i128 %_1) // bit32: define {{(dso_local )?}}void @test_UnionU128({{.*}}sret([16 x i8]){{.*}}, i128 %_1) // bit64: define {{(dso_local )?}}i128 @test_UnionU128(i128 %_1) #[no_mangle] |
