diff options
| author | bors <bors@rust-lang.org> | 2025-07-26 22:45:18 +0000 |
|---|---|---|
| committer | bors <bors@rust-lang.org> | 2025-07-26 22:45:18 +0000 |
| commit | 283a0746a244a88503fed61844f44df925ccdbb6 (patch) | |
| tree | ed63ef3f5b1d2f81899e9a3a436ba7f9a0aac451 /tests | |
| parent | ce5fdd7d42aba9a2925692e11af2bd39cf37798a (diff) | |
| parent | b7e025cfb64094d8672c752a9fffb12e9bc79567 (diff) | |
| download | rust-283a0746a244a88503fed61844f44df925ccdbb6.tar.gz rust-283a0746a244a88503fed61844f44df925ccdbb6.zip | |
Auto merge of #143860 - scottmcm:transmute-always-rvalue, r=WaffleLapkin
Let `codegen_transmute_operand` just handle everything When combined with rust-lang/rust#143720, this means `rvalue_creates_operand` can just return `true` for *every* `Rvalue`. (A future PR could consider removing it, though just letting it optimize out is fine for now.) It's nicer anyway, IMHO, because it avoids needing the layout checks to be consistent in the two places, and thus is an overall reduction in code. Plus it's a more helpful building block when used in other places this way. (TBH, it probably would have been better to have it this way the whole time, but I clearly didn't understand `rvalue_creates_operand` when I originally wrote rust-lang/rust#109843.)
Diffstat (limited to 'tests')
| -rw-r--r-- | tests/codegen-llvm/intrinsics/transmute-simd.rs | 176 | ||||
| -rw-r--r-- | tests/codegen-llvm/intrinsics/transmute.rs | 26 |
2 files changed, 192 insertions, 10 deletions
diff --git a/tests/codegen-llvm/intrinsics/transmute-simd.rs b/tests/codegen-llvm/intrinsics/transmute-simd.rs new file mode 100644 index 00000000000..e34b27e1333 --- /dev/null +++ b/tests/codegen-llvm/intrinsics/transmute-simd.rs @@ -0,0 +1,176 @@ +//@ compile-flags: -Copt-level=3 -C no-prepopulate-passes +//@ only-64bit (so I don't need to worry about usize) +//@ revisions: aarch64 x86_64 +//@ [aarch64] only-aarch64 +//@ [aarch64] compile-flags: -C target-feature=+neon +//@ [x86_64] only-x86_64 +//@ [x86_64] compile-flags: -C target-feature=+sse2 + +#![crate_type = "lib"] +#![feature(core_intrinsics)] +#![feature(portable_simd)] + +use std::intrinsics::transmute; +use std::simd::{Simd, f32x4, f64x2, i32x4, i64x2}; +type PtrX2 = Simd<*const (), 2>; + +// These tests use the "C" ABI so that the vectors in question aren't passed and +// returned though memory (as they are in the "Rust" ABI), which greatly +// simplifies seeing the difference between the in-operand cases vs the ones +// that fallback to just using the `LocalKind::Memory` path. + +// CHECK-LABEL: <2 x i64> @mixed_int(<4 x i32> %v) +#[no_mangle] +pub extern "C" fn mixed_int(v: i32x4) -> i64x2 { + // CHECK-NOT: alloca + // CHECK: %[[RET:.+]] = bitcast <4 x i32> %v to <2 x i64> + // CHECK: ret <2 x i64> %[[RET]] + unsafe { transmute(v) } +} + +// CHECK-LABEL: <2 x double> @mixed_float(<4 x float> %v) +#[no_mangle] +pub extern "C" fn mixed_float(v: f32x4) -> f64x2 { + // CHECK-NOT: alloca + // CHECK: %[[RET:.+]] = bitcast <4 x float> %v to <2 x double> + // CHECK: ret <2 x double> %[[RET]] + unsafe { transmute(v) } +} + +// CHECK-LABEL: <4 x i32> @float_int_same_lanes(<4 x float> %v) +#[no_mangle] +pub extern "C" fn float_int_same_lanes(v: f32x4) -> i32x4 { + // CHECK-NOT: alloca + // CHECK: %[[RET:.+]] = bitcast <4 x float> %v to <4 x i32> + // CHECK: ret <4 x i32> %[[RET]] + unsafe { transmute(v) } +} + +// CHECK-LABEL: <2 x double> @int_float_same_lanes(<2 x i64> %v) +#[no_mangle] +pub extern "C" fn int_float_same_lanes(v: i64x2) -> f64x2 { + // CHECK-NOT: alloca + // CHECK: %[[RET:.+]] = bitcast <2 x i64> %v to <2 x double> + // CHECK: ret <2 x double> %[[RET]] + unsafe { transmute(v) } +} + +// CHECK-LABEL: <2 x i64> @float_int_widen(<4 x float> %v) +#[no_mangle] +pub extern "C" fn float_int_widen(v: f32x4) -> i64x2 { + // CHECK-NOT: alloca + // CHECK: %[[RET:.+]] = bitcast <4 x float> %v to <2 x i64> + // CHECK: ret <2 x i64> %[[RET]] + unsafe { transmute(v) } +} + +// CHECK-LABEL: <2 x double> @int_float_widen(<4 x i32> %v) +#[no_mangle] +pub extern "C" fn int_float_widen(v: i32x4) -> f64x2 { + // CHECK-NOT: alloca + // CHECK: %[[RET:.+]] = bitcast <4 x i32> %v to <2 x double> + // CHECK: ret <2 x double> %[[RET]] + unsafe { transmute(v) } +} + +// CHECK-LABEL: <4 x i32> @float_int_narrow(<2 x double> %v) +#[no_mangle] +pub extern "C" fn float_int_narrow(v: f64x2) -> i32x4 { + // CHECK-NOT: alloca + // CHECK: %[[RET:.+]] = bitcast <2 x double> %v to <4 x i32> + // CHECK: ret <4 x i32> %[[RET]] + unsafe { transmute(v) } +} + +// CHECK-LABEL: <4 x float> @int_float_narrow(<2 x i64> %v) +#[no_mangle] +pub extern "C" fn int_float_narrow(v: i64x2) -> f32x4 { + // CHECK-NOT: alloca + // CHECK: %[[RET:.+]] = bitcast <2 x i64> %v to <4 x float> + // CHECK: ret <4 x float> %[[RET]] + unsafe { transmute(v) } +} + +// CHECK-LABEL: <2 x ptr> @float_ptr_same_lanes(<2 x double> %v) +#[no_mangle] +pub extern "C" fn float_ptr_same_lanes(v: f64x2) -> PtrX2 { + // CHECK-NOT: alloca + // CHECK: %[[TEMP:.+]] = alloca [16 x i8] + // CHECK-NOT: alloca + // CHECK: call void @llvm.lifetime.start.p0(i64 16, ptr %[[TEMP]]) + // CHECK: store <2 x double> %v, ptr %[[TEMP]] + // CHECK: %[[RET:.+]] = load <2 x ptr>, ptr %[[TEMP]] + // CHECK: call void @llvm.lifetime.end.p0(i64 16, ptr %[[TEMP]]) + // CHECK: ret <2 x ptr> %[[RET]] + unsafe { transmute(v) } +} + +// CHECK-LABEL: <2 x double> @ptr_float_same_lanes(<2 x ptr> %v) +#[no_mangle] +pub extern "C" fn ptr_float_same_lanes(v: PtrX2) -> f64x2 { + // CHECK-NOT: alloca + // CHECK: %[[TEMP:.+]] = alloca [16 x i8] + // CHECK-NOT: alloca + // CHECK: call void @llvm.lifetime.start.p0(i64 16, ptr %[[TEMP]]) + // CHECK: store <2 x ptr> %v, ptr %[[TEMP]] + // CHECK: %[[RET:.+]] = load <2 x double>, ptr %[[TEMP]] + // CHECK: call void @llvm.lifetime.end.p0(i64 16, ptr %[[TEMP]]) + // CHECK: ret <2 x double> %[[RET]] + unsafe { transmute(v) } +} + +// CHECK-LABEL: <2 x ptr> @int_ptr_same_lanes(<2 x i64> %v) +#[no_mangle] +pub extern "C" fn int_ptr_same_lanes(v: i64x2) -> PtrX2 { + // CHECK-NOT: alloca + // CHECK: %[[TEMP:.+]] = alloca [16 x i8] + // CHECK-NOT: alloca + // CHECK: call void @llvm.lifetime.start.p0(i64 16, ptr %[[TEMP]]) + // CHECK: store <2 x i64> %v, ptr %[[TEMP]] + // CHECK: %[[RET:.+]] = load <2 x ptr>, ptr %[[TEMP]] + // CHECK: call void @llvm.lifetime.end.p0(i64 16, ptr %[[TEMP]]) + // CHECK: ret <2 x ptr> %[[RET]] + unsafe { transmute(v) } +} + +// CHECK-LABEL: <2 x i64> @ptr_int_same_lanes(<2 x ptr> %v) +#[no_mangle] +pub extern "C" fn ptr_int_same_lanes(v: PtrX2) -> i64x2 { + // CHECK-NOT: alloca + // CHECK: %[[TEMP:.+]] = alloca [16 x i8] + // CHECK-NOT: alloca + // CHECK: call void @llvm.lifetime.start.p0(i64 16, ptr %[[TEMP]]) + // CHECK: store <2 x ptr> %v, ptr %[[TEMP]] + // CHECK: %[[RET:.+]] = load <2 x i64>, ptr %[[TEMP]] + // CHECK: call void @llvm.lifetime.end.p0(i64 16, ptr %[[TEMP]]) + // CHECK: ret <2 x i64> %[[RET]] + unsafe { transmute(v) } +} + +// CHECK-LABEL: <2 x ptr> @float_ptr_widen(<4 x float> %v) +#[no_mangle] +pub extern "C" fn float_ptr_widen(v: f32x4) -> PtrX2 { + // CHECK-NOT: alloca + // CHECK: %[[TEMP:.+]] = alloca [16 x i8] + // CHECK-NOT: alloca + // CHECK: call void @llvm.lifetime.start.p0(i64 16, ptr %[[TEMP]]) + // CHECK: store <4 x float> %v, ptr %[[TEMP]] + // CHECK: %[[RET:.+]] = load <2 x ptr>, ptr %[[TEMP]] + // CHECK: call void @llvm.lifetime.end.p0(i64 16, ptr %[[TEMP]]) + // CHECK: ret <2 x ptr> %[[RET]] + unsafe { transmute(v) } +} + +// CHECK-LABEL: <2 x ptr> @int_ptr_widen(<4 x i32> %v) +#[no_mangle] +pub extern "C" fn int_ptr_widen(v: i32x4) -> PtrX2 { + // CHECK-NOT: alloca + // CHECK: %[[TEMP:.+]] = alloca [16 x i8] + // CHECK-NOT: alloca + // CHECK: call void @llvm.lifetime.start.p0(i64 16, ptr %[[TEMP]]) + // CHECK: store <4 x i32> %v, ptr %[[TEMP]] + // CHECK: %[[RET:.+]] = load <2 x ptr>, ptr %[[TEMP]] + // CHECK: call void @llvm.lifetime.end.p0(i64 16, ptr %[[TEMP]]) + // CHECK: ret <2 x ptr> %[[RET]] + unsafe { transmute(v) } +} diff --git a/tests/codegen-llvm/intrinsics/transmute.rs b/tests/codegen-llvm/intrinsics/transmute.rs index c9a1cd58af3..91cff38773d 100644 --- a/tests/codegen-llvm/intrinsics/transmute.rs +++ b/tests/codegen-llvm/intrinsics/transmute.rs @@ -191,22 +191,28 @@ pub unsafe fn check_byte_from_bool(x: bool) -> u8 { // CHECK-LABEL: @check_to_pair( #[no_mangle] pub unsafe fn check_to_pair(x: u64) -> Option<i32> { - // CHECK: %_0 = alloca [8 x i8], align 4 - // CHECK: store i64 %x, ptr %_0, align 4 + // CHECK: %[[TEMP:.+]] = alloca [8 x i8], align 8 + // CHECK: call void @llvm.lifetime.start.p0(i64 8, ptr %[[TEMP]]) + // CHECK: store i64 %x, ptr %[[TEMP]], align 8 + // CHECK: %[[PAIR0:.+]] = load i32, ptr %[[TEMP]], align 8 + // CHECK: %[[PAIR1P:.+]] = getelementptr inbounds i8, ptr %[[TEMP]], i64 4 + // CHECK: %[[PAIR1:.+]] = load i32, ptr %[[PAIR1P]], align 4 + // CHECK: call void @llvm.lifetime.end.p0(i64 8, ptr %[[TEMP]]) + // CHECK: insertvalue {{.+}}, i32 %[[PAIR0]], 0 + // CHECK: insertvalue {{.+}}, i32 %[[PAIR1]], 1 transmute(x) } // CHECK-LABEL: @check_from_pair( #[no_mangle] pub unsafe fn check_from_pair(x: Option<i32>) -> u64 { - // The two arguments are of types that are only 4-aligned, but they're - // immediates so we can write using the destination alloca's alignment. - const { assert!(std::mem::align_of::<Option<i32>>() == 4) }; - - // CHECK: %_0 = alloca [8 x i8], align 8 - // CHECK: store i32 %x.0, ptr %_0, align 8 - // CHECK: store i32 %x.1, ptr %0, align 4 - // CHECK: %[[R:.+]] = load i64, ptr %_0, align 8 + // CHECK: %[[TEMP:.+]] = alloca [8 x i8], align 8 + // CHECK: call void @llvm.lifetime.start.p0(i64 8, ptr %[[TEMP]]) + // CHECK: store i32 %x.0, ptr %[[TEMP]], align 8 + // CHECK: %[[PAIR1P:.+]] = getelementptr inbounds i8, ptr %[[TEMP]], i64 4 + // CHECK: store i32 %x.1, ptr %[[PAIR1P]], align 4 + // CHECK: %[[R:.+]] = load i64, ptr %[[TEMP]], align 8 + // CHECK: call void @llvm.lifetime.end.p0(i64 8, ptr %[[TEMP]]) // CHECK: ret i64 %[[R]] transmute(x) } |
