re-enable direct `bitcast`s for Int/Float vector transmutes (but not ones involving pointers)

author: Scott McMurray <scottmcm@users.noreply.github.com> 2025-07-18 01:33:32 -0700
committer: Scott McMurray <scottmcm@users.noreply.github.com> 2025-07-23 08:32:46 -0700
commit: ea0c7788c049b608f2f497ec3a4b4117c359523c (patch)
tree: a25eaba152d4aae7c2b1b25db4c0d3155d5c219b
parent: 231dddde3e686c0903a28ce605b8c5d505a2ba21 (diff)
download: rust-ea0c7788c049b608f2f497ec3a4b4117c359523c.tar.gz
rust-ea0c7788c049b608f2f497ec3a4b4117c359523c.zip
2 files changed, 197 insertions, 0 deletions
diff --git a/compiler/rustc_codegen_ssa/src/mir/rvalue.rs b/compiler/rustc_codegen_ssa/src/mir/rvalue.rs
index 25b7447379a..4c080de2b31 100644
--- a/compiler/rustc_codegen_ssa/src/mir/rvalue.rs
+++ b/compiler/rustc_codegen_ssa/src/mir/rvalue.rs
@@ -253,6 +253,19 @@ impl<'a, 'tcx, Bx: BuilderMethods<'a, 'tcx>> FunctionCx<'a, 'tcx, Bx> {
             return OperandValue::poison(bx, cast);
         }
 
+        // To or from pointers takes different methods, so we use this to restrict
+        // the SimdVector case to types which can be `bitcast` between each other.
+        #[inline]
+        fn vector_can_bitcast(x: abi::Scalar) -> bool {
+            matches!(
+                x,
+                abi::Scalar::Initialized {
+                    value: abi::Primitive::Int(..) | abi::Primitive::Float(..),
+                    ..
+                }
+            )
+        }
+
         let cx = bx.cx();
         match (operand.val, operand.layout.backend_repr, cast.backend_repr) {
             _ if cast.is_zst() => OperandValue::ZeroSized,
@@ -270,6 +283,14 @@ impl<'a, 'tcx, Bx: BuilderMethods<'a, 'tcx>> FunctionCx<'a, 'tcx, Bx> {
                 OperandValue::Immediate(transmute_scalar(bx, imm, from_scalar, to_scalar))
             }
             (
+                OperandValue::Immediate(imm),
+                abi::BackendRepr::SimdVector { element: from_scalar, .. },
+                abi::BackendRepr::SimdVector { element: to_scalar, .. },
+            ) if vector_can_bitcast(from_scalar) && vector_can_bitcast(to_scalar) => {
+                let to_backend_ty = bx.cx().immediate_backend_type(cast);
+                OperandValue::Immediate(bx.bitcast(imm, to_backend_ty))
+            }
+            (
                 OperandValue::Pair(imm_a, imm_b),
                 abi::BackendRepr::ScalarPair(in_a, in_b),
                 abi::BackendRepr::ScalarPair(out_a, out_b),
diff --git a/tests/codegen-llvm/intrinsics/transmute-simd.rs b/tests/codegen-llvm/intrinsics/transmute-simd.rs
new file mode 100644
index 00000000000..e34b27e1333
--- /dev/null
+++ b/tests/codegen-llvm/intrinsics/transmute-simd.rs
@@ -0,0 +1,176 @@
+//@ compile-flags: -Copt-level=3 -C no-prepopulate-passes
+//@ only-64bit (so I don't need to worry about usize)
+//@ revisions: aarch64 x86_64
+//@ [aarch64] only-aarch64
+//@ [aarch64] compile-flags: -C target-feature=+neon
+//@ [x86_64] only-x86_64
+//@ [x86_64] compile-flags: -C target-feature=+sse2
+
+#![crate_type = "lib"]
+#![feature(core_intrinsics)]
+#![feature(portable_simd)]
+
+use std::intrinsics::transmute;
+use std::simd::{Simd, f32x4, f64x2, i32x4, i64x2};
+type PtrX2 = Simd<*const (), 2>;
+
+// These tests use the "C" ABI so that the vectors in question aren't passed and
+// returned though memory (as they are in the "Rust" ABI), which greatly
+// simplifies seeing the difference between the in-operand cases vs the ones
+// that fallback to just using the `LocalKind::Memory` path.
+
+// CHECK-LABEL: <2 x i64> @mixed_int(<4 x i32> %v)
+#[no_mangle]
+pub extern "C" fn mixed_int(v: i32x4) -> i64x2 {
+    // CHECK-NOT: alloca
+    // CHECK: %[[RET:.+]] = bitcast <4 x i32> %v to <2 x i64>
+    // CHECK: ret <2 x i64> %[[RET]]
+    unsafe { transmute(v) }
+}
+
+// CHECK-LABEL: <2 x double> @mixed_float(<4 x float> %v)
+#[no_mangle]
+pub extern "C" fn mixed_float(v: f32x4) -> f64x2 {
+    // CHECK-NOT: alloca
+    // CHECK: %[[RET:.+]] = bitcast <4 x float> %v to <2 x double>
+    // CHECK: ret <2 x double> %[[RET]]
+    unsafe { transmute(v) }
+}
+
+// CHECK-LABEL: <4 x i32> @float_int_same_lanes(<4 x float> %v)
+#[no_mangle]
+pub extern "C" fn float_int_same_lanes(v: f32x4) -> i32x4 {
+    // CHECK-NOT: alloca
+    // CHECK: %[[RET:.+]] = bitcast <4 x float> %v to <4 x i32>
+    // CHECK: ret <4 x i32> %[[RET]]
+    unsafe { transmute(v) }
+}
+
+// CHECK-LABEL: <2 x double> @int_float_same_lanes(<2 x i64> %v)
+#[no_mangle]
+pub extern "C" fn int_float_same_lanes(v: i64x2) -> f64x2 {
+    // CHECK-NOT: alloca
+    // CHECK: %[[RET:.+]] = bitcast <2 x i64> %v to <2 x double>
+    // CHECK: ret <2 x double> %[[RET]]
+    unsafe { transmute(v) }
+}
+
+// CHECK-LABEL: <2 x i64> @float_int_widen(<4 x float> %v)
+#[no_mangle]
+pub extern "C" fn float_int_widen(v: f32x4) -> i64x2 {
+    // CHECK-NOT: alloca
+    // CHECK: %[[RET:.+]] = bitcast <4 x float> %v to <2 x i64>
+    // CHECK: ret <2 x i64> %[[RET]]
+    unsafe { transmute(v) }
+}
+
+// CHECK-LABEL: <2 x double> @int_float_widen(<4 x i32> %v)
+#[no_mangle]
+pub extern "C" fn int_float_widen(v: i32x4) -> f64x2 {
+    // CHECK-NOT: alloca
+    // CHECK: %[[RET:.+]] = bitcast <4 x i32> %v to <2 x double>
+    // CHECK: ret <2 x double> %[[RET]]
+    unsafe { transmute(v) }
+}
+
+// CHECK-LABEL: <4 x i32> @float_int_narrow(<2 x double> %v)
+#[no_mangle]
+pub extern "C" fn float_int_narrow(v: f64x2) -> i32x4 {
+    // CHECK-NOT: alloca
+    // CHECK: %[[RET:.+]] = bitcast <2 x double> %v to <4 x i32>
+    // CHECK: ret <4 x i32> %[[RET]]
+    unsafe { transmute(v) }
+}
+
+// CHECK-LABEL: <4 x float> @int_float_narrow(<2 x i64> %v)
+#[no_mangle]
+pub extern "C" fn int_float_narrow(v: i64x2) -> f32x4 {
+    // CHECK-NOT: alloca
+    // CHECK: %[[RET:.+]] = bitcast <2 x i64> %v to <4 x float>
+    // CHECK: ret <4 x float> %[[RET]]
+    unsafe { transmute(v) }
+}
+
+// CHECK-LABEL: <2 x ptr> @float_ptr_same_lanes(<2 x double> %v)
+#[no_mangle]
+pub extern "C" fn float_ptr_same_lanes(v: f64x2) -> PtrX2 {
+    // CHECK-NOT: alloca
+    // CHECK: %[[TEMP:.+]] = alloca [16 x i8]
+    // CHECK-NOT: alloca
+    // CHECK: call void @llvm.lifetime.start.p0(i64 16, ptr %[[TEMP]])
+    // CHECK: store <2 x double> %v, ptr %[[TEMP]]
+    // CHECK: %[[RET:.+]] = load <2 x ptr>, ptr %[[TEMP]]
+    // CHECK: call void @llvm.lifetime.end.p0(i64 16, ptr %[[TEMP]])
+    // CHECK: ret <2 x ptr> %[[RET]]
+    unsafe { transmute(v) }
+}
+
+// CHECK-LABEL: <2 x double> @ptr_float_same_lanes(<2 x ptr> %v)
+#[no_mangle]
+pub extern "C" fn ptr_float_same_lanes(v: PtrX2) -> f64x2 {
+    // CHECK-NOT: alloca
+    // CHECK: %[[TEMP:.+]] = alloca [16 x i8]
+    // CHECK-NOT: alloca
+    // CHECK: call void @llvm.lifetime.start.p0(i64 16, ptr %[[TEMP]])
+    // CHECK: store <2 x ptr> %v, ptr %[[TEMP]]
+    // CHECK: %[[RET:.+]] = load <2 x double>, ptr %[[TEMP]]
+    // CHECK: call void @llvm.lifetime.end.p0(i64 16, ptr %[[TEMP]])
+    // CHECK: ret <2 x double> %[[RET]]
+    unsafe { transmute(v) }
+}
+
+// CHECK-LABEL: <2 x ptr> @int_ptr_same_lanes(<2 x i64> %v)
+#[no_mangle]
+pub extern "C" fn int_ptr_same_lanes(v: i64x2) -> PtrX2 {
+    // CHECK-NOT: alloca
+    // CHECK: %[[TEMP:.+]] = alloca [16 x i8]
+    // CHECK-NOT: alloca
+    // CHECK: call void @llvm.lifetime.start.p0(i64 16, ptr %[[TEMP]])
+    // CHECK: store <2 x i64> %v, ptr %[[TEMP]]
+    // CHECK: %[[RET:.+]] = load <2 x ptr>, ptr %[[TEMP]]
+    // CHECK: call void @llvm.lifetime.end.p0(i64 16, ptr %[[TEMP]])
+    // CHECK: ret <2 x ptr> %[[RET]]
+    unsafe { transmute(v) }
+}
+
+// CHECK-LABEL: <2 x i64> @ptr_int_same_lanes(<2 x ptr> %v)
+#[no_mangle]
+pub extern "C" fn ptr_int_same_lanes(v: PtrX2) -> i64x2 {
+    // CHECK-NOT: alloca
+    // CHECK: %[[TEMP:.+]] = alloca [16 x i8]
+    // CHECK-NOT: alloca
+    // CHECK: call void @llvm.lifetime.start.p0(i64 16, ptr %[[TEMP]])
+    // CHECK: store <2 x ptr> %v, ptr %[[TEMP]]
+    // CHECK: %[[RET:.+]] = load <2 x i64>, ptr %[[TEMP]]
+    // CHECK: call void @llvm.lifetime.end.p0(i64 16, ptr %[[TEMP]])
+    // CHECK: ret <2 x i64> %[[RET]]
+    unsafe { transmute(v) }
+}
+
+// CHECK-LABEL: <2 x ptr> @float_ptr_widen(<4 x float> %v)
+#[no_mangle]
+pub extern "C" fn float_ptr_widen(v: f32x4) -> PtrX2 {
+    // CHECK-NOT: alloca
+    // CHECK: %[[TEMP:.+]] = alloca [16 x i8]
+    // CHECK-NOT: alloca
+    // CHECK: call void @llvm.lifetime.start.p0(i64 16, ptr %[[TEMP]])
+    // CHECK: store <4 x float> %v, ptr %[[TEMP]]
+    // CHECK: %[[RET:.+]] = load <2 x ptr>, ptr %[[TEMP]]
+    // CHECK: call void @llvm.lifetime.end.p0(i64 16, ptr %[[TEMP]])
+    // CHECK: ret <2 x ptr> %[[RET]]
+    unsafe { transmute(v) }
+}
+
+// CHECK-LABEL: <2 x ptr> @int_ptr_widen(<4 x i32> %v)
+#[no_mangle]
+pub extern "C" fn int_ptr_widen(v: i32x4) -> PtrX2 {
+    // CHECK-NOT: alloca
+    // CHECK: %[[TEMP:.+]] = alloca [16 x i8]
+    // CHECK-NOT: alloca
+    // CHECK: call void @llvm.lifetime.start.p0(i64 16, ptr %[[TEMP]])
+    // CHECK: store <4 x i32> %v, ptr %[[TEMP]]
+    // CHECK: %[[RET:.+]] = load <2 x ptr>, ptr %[[TEMP]]
+    // CHECK: call void @llvm.lifetime.end.p0(i64 16, ptr %[[TEMP]])
+    // CHECK: ret <2 x ptr> %[[RET]]
+    unsafe { transmute(v) }
+}
author	Scott McMurray <scottmcm@users.noreply.github.com>	2025-07-18 01:33:32 -0700
committer	Scott McMurray <scottmcm@users.noreply.github.com>	2025-07-23 08:32:46 -0700
commit	ea0c7788c049b608f2f497ec3a4b4117c359523c (patch)
tree	a25eaba152d4aae7c2b1b25db4c0d3155d5c219b
parent	231dddde3e686c0903a28ce605b8c5d505a2ba21 (diff)
download	rust-ea0c7788c049b608f2f497ec3a4b4117c359523c.tar.gz rust-ea0c7788c049b608f2f497ec3a4b4117c359523c.zip