diff options
| author | Ralf Jung <post@ralfj.de> | 2025-05-20 21:53:57 +0200 |
|---|---|---|
| committer | Ralf Jung <post@ralfj.de> | 2025-06-04 08:38:49 +0200 |
| commit | 321db85fb4fa58fc03e86a3a7a901ff7fb624583 (patch) | |
| tree | 4f82d0cde65b6758217ed881f4ca8d34b028ad4a | |
| parent | 792fc2b033aea7ea7b766e38bdc40f7d6bdce8c3 (diff) | |
| download | rust-321db85fb4fa58fc03e86a3a7a901ff7fb624583.tar.gz rust-321db85fb4fa58fc03e86a3a7a901ff7fb624583.zip | |
x86 (32/64): go back to passing SIMD vectors by-ptr
| -rw-r--r-- | compiler/rustc_target/src/callconv/mod.rs | 27 | ||||
| -rw-r--r-- | tests/codegen/abi-x86-sse.rs | 5 | ||||
| -rw-r--r-- | tests/codegen/simd-intrinsic/simd-intrinsic-transmute-array.rs | 20 | ||||
| -rw-r--r-- | tests/codegen/simd/packed-simd.rs | 16 |
4 files changed, 22 insertions, 46 deletions
diff --git a/compiler/rustc_target/src/callconv/mod.rs b/compiler/rustc_target/src/callconv/mod.rs index d2e49cea647..dcb79cce759 100644 --- a/compiler/rustc_target/src/callconv/mod.rs +++ b/compiler/rustc_target/src/callconv/mod.rs @@ -7,7 +7,7 @@ use rustc_abi::{ use rustc_macros::HashStable_Generic; pub use crate::spec::AbiMap; -use crate::spec::{HasTargetSpec, HasWasmCAbiOpt, HasX86AbiOpt, RustcAbi, WasmCAbi}; +use crate::spec::{HasTargetSpec, HasWasmCAbiOpt, HasX86AbiOpt, WasmCAbi}; mod aarch64; mod amdgpu; @@ -696,24 +696,6 @@ impl<'a, Ty> FnAbi<'a, Ty> { _ => {} }; - // Decides whether we can pass the given SIMD argument via `PassMode::Direct`. - // May only return `true` if the target will always pass those arguments the same way, - // no matter what the user does with `-Ctarget-feature`! In other words, whatever - // target features are required to pass a SIMD value in registers must be listed in - // the `abi_required_features` for the current target and ABI. - let can_pass_simd_directly = |arg: &ArgAbi<'_, Ty>| match &*spec.arch { - // On x86, if we have SSE2 (which we have by default for x86_64), we can always pass up - // to 128-bit-sized vectors. - "x86" if spec.rustc_abi == Some(RustcAbi::X86Sse2) => arg.layout.size.bits() <= 128, - "x86_64" if spec.rustc_abi != Some(RustcAbi::X86Softfloat) => { - // FIXME once https://github.com/bytecodealliance/wasmtime/issues/10254 is fixed - // accept vectors up to 128bit rather than vectors of exactly 128bit. - arg.layout.size.bits() == 128 - } - // So far, we haven't implemented this logic for any other target. - _ => false, - }; - for (arg_idx, arg) in self .args .iter_mut() @@ -813,9 +795,10 @@ impl<'a, Ty> FnAbi<'a, Ty> { // target feature sets. Some more information about this // issue can be found in #44367. // - // Note that the intrinsic ABI is exempt here as those are not - // real functions anyway, and the backend expects very specific types. - if spec.simd_types_indirect && !can_pass_simd_directly(arg) { + // We *could* do better in some cases, e.g. on x86_64 targets where SSE2 is + // required. However, it turns out that that makes LLVM worse at optimizing this + // code, so we pass things indirectly even there. See #139029 for more on that. + if spec.simd_types_indirect { arg.make_indirect(); } } diff --git a/tests/codegen/abi-x86-sse.rs b/tests/codegen/abi-x86-sse.rs index 837bf6134b0..90757e601af 100644 --- a/tests/codegen/abi-x86-sse.rs +++ b/tests/codegen/abi-x86-sse.rs @@ -27,8 +27,9 @@ trait Copy {} #[repr(simd)] pub struct Sse([f32; 4]); -// x86-64: <4 x float> @sse_id(<4 x float> {{[^,]*}}) -// x86-32: <4 x float> @sse_id(<4 x float> {{[^,]*}}) +// FIXME: due to #139029 we are passing them all indirectly. +// x86-64: void @sse_id(ptr{{( [^,]*)?}} sret([16 x i8]){{( .*)?}}, ptr{{( [^,]*)?}}) +// x86-32: void @sse_id(ptr{{( [^,]*)?}} sret([16 x i8]){{( .*)?}}, ptr{{( [^,]*)?}}) // x86-32-nosse: void @sse_id(ptr{{( [^,]*)?}} sret([16 x i8]){{( .*)?}}, ptr{{( [^,]*)?}}) #[no_mangle] pub fn sse_id(x: Sse) -> Sse { diff --git a/tests/codegen/simd-intrinsic/simd-intrinsic-transmute-array.rs b/tests/codegen/simd-intrinsic/simd-intrinsic-transmute-array.rs index d3853361de9..977bf3379b7 100644 --- a/tests/codegen/simd-intrinsic/simd-intrinsic-transmute-array.rs +++ b/tests/codegen/simd-intrinsic/simd-intrinsic-transmute-array.rs @@ -1,14 +1,8 @@ // //@ compile-flags: -C no-prepopulate-passes -// LLVM IR isn't very portable and the one tested here depends on the ABI -// which is different between x86 (where we use SSE registers) and others. -// `x86-64` and `x86-32-sse2` are identical, but compiletest does not support -// taking the union of multiple `only` annotations. -//@ revisions: x86-64 x86-32-sse2 other -//@[x86-64] only-x86_64 -//@[x86-32-sse2] only-rustc_abi-x86-sse2 -//@[other] ignore-rustc_abi-x86-sse2 -//@[other] ignore-x86_64 +// 32bit MSVC does not align things properly so we suppress high alignment annotations (#112480) +//@ ignore-i686-pc-windows-msvc +//@ ignore-i686-pc-windows-gnu #![crate_type = "lib"] #![allow(non_camel_case_types)] @@ -47,9 +41,7 @@ pub fn build_array_s(x: [f32; 4]) -> S<4> { #[no_mangle] pub fn build_array_transmute_s(x: [f32; 4]) -> S<4> { // CHECK: %[[VAL:.+]] = load <4 x float>, ptr %x, align [[ARRAY_ALIGN]] - // x86-32: ret <4 x float> %[[VAL:.+]] - // x86-64: ret <4 x float> %[[VAL:.+]] - // other: store <4 x float> %[[VAL:.+]], ptr %_0, align [[VECTOR_ALIGN]] + // CHECK: store <4 x float> %[[VAL:.+]], ptr %_0, align [[VECTOR_ALIGN]] unsafe { std::mem::transmute(x) } } @@ -64,8 +56,6 @@ pub fn build_array_t(x: [f32; 4]) -> T { #[no_mangle] pub fn build_array_transmute_t(x: [f32; 4]) -> T { // CHECK: %[[VAL:.+]] = load <4 x float>, ptr %x, align [[ARRAY_ALIGN]] - // x86-32: ret <4 x float> %[[VAL:.+]] - // x86-64: ret <4 x float> %[[VAL:.+]] - // other: store <4 x float> %[[VAL:.+]], ptr %_0, align [[VECTOR_ALIGN]] + // CHECK: store <4 x float> %[[VAL:.+]], ptr %_0, align [[VECTOR_ALIGN]] unsafe { std::mem::transmute(x) } } diff --git a/tests/codegen/simd/packed-simd.rs b/tests/codegen/simd/packed-simd.rs index a27d5e3af45..73e0d29d7d6 100644 --- a/tests/codegen/simd/packed-simd.rs +++ b/tests/codegen/simd/packed-simd.rs @@ -30,16 +30,18 @@ fn load<T, const N: usize>(v: PackedSimd<T, N>) -> FullSimd<T, N> { } } -// CHECK-LABEL: define <3 x float> @square_packed_full(ptr{{[a-z_ ]*}} align 4 {{[^,]*}}) +// CHECK-LABEL: square_packed_full +// CHECK-SAME: ptr{{[a-z_ ]*}} sret([[RET_TYPE:[^)]+]]) [[RET_ALIGN:align (8|16)]]{{[^%]*}} [[RET_VREG:%[_0-9]*]] +// CHECK-SAME: ptr{{[a-z_ ]*}} align 4 #[no_mangle] pub fn square_packed_full(x: PackedSimd<f32, 3>) -> FullSimd<f32, 3> { - // The unoptimized version of this is not very interesting to check - // since `load` does not get inlined. - // opt3-NEXT: start: - // opt3-NEXT: load <3 x float> + // CHECK-NEXT: start + // noopt: alloca [[RET_TYPE]], [[RET_ALIGN]] + // CHECK: load <3 x float> let x = load(x); - // opt3-NEXT: [[VREG:%[a-z0-9_]+]] = fmul <3 x float> - // opt3-NEXT: ret <3 x float> [[VREG:%[a-z0-9_]+]] + // CHECK: [[VREG:%[a-z0-9_]+]] = fmul <3 x float> + // CHECK-NEXT: store <3 x float> [[VREG]], ptr [[RET_VREG]], [[RET_ALIGN]] + // CHECK-NEXT: ret void unsafe { intrinsics::simd_mul(x, x) } } |
