5 files changed, 258 insertions, 106 deletions
diff --git a/compiler/rustc_codegen_ssa/src/mir/analyze.rs b/compiler/rustc_codegen_ssa/src/mir/analyze.rs
index 6d6465dd798..b9a6d3af445 100644
--- a/compiler/rustc_codegen_ssa/src/mir/analyze.rs
+++ b/compiler/rustc_codegen_ssa/src/mir/analyze.rs
@@ -170,11 +170,6 @@ impl<'a, 'b, 'tcx, Bx: BuilderMethods<'b, 'tcx>> Visitor<'tcx> for LocalAnalyzer
 
         if let Some(local) = place.as_local() {
             self.define(local, DefLocation::Assignment(location));
-            if self.locals[local] != LocalKind::Memory {
-                if !self.fx.rvalue_creates_operand(rvalue) {
-                    self.locals[local] = LocalKind::Memory;
-                }
-            }
         } else {
             self.visit_place(place, PlaceContext::MutatingUse(MutatingUseContext::Store), location);
         }
diff --git a/compiler/rustc_codegen_ssa/src/mir/operand.rs b/compiler/rustc_codegen_ssa/src/mir/operand.rs
index 8e308aac769..d2c71070f92 100644
--- a/compiler/rustc_codegen_ssa/src/mir/operand.rs
+++ b/compiler/rustc_codegen_ssa/src/mir/operand.rs
@@ -335,13 +335,6 @@ impl<'a, 'tcx, V: CodegenObject> OperandRef<'tcx, V> {
 
         let val = if field.is_zst() {
             OperandValue::ZeroSized
-        } else if let BackendRepr::SimdVector { .. } = self.layout.backend_repr {
-            // codegen_transmute_operand doesn't support SIMD, but since the previous
-            // check handled ZSTs, the only possible field access into something SIMD
-            // is to the `non_1zst_field` that's the same SIMD. (Other things, even
-            // just padding, would change the wrapper's representation type.)
-            assert_eq!(field.size, self.layout.size);
-            self.val
         } else if field.size == self.layout.size {
             assert_eq!(offset.bytes(), 0);
             fx.codegen_transmute_operand(bx, *self, field)
diff --git a/compiler/rustc_codegen_ssa/src/mir/rvalue.rs b/compiler/rustc_codegen_ssa/src/mir/rvalue.rs
index a5759b79be4..8a67b8d6e5f 100644
--- a/compiler/rustc_codegen_ssa/src/mir/rvalue.rs
+++ b/compiler/rustc_codegen_ssa/src/mir/rvalue.rs
@@ -2,12 +2,12 @@ use rustc_abi::{self as abi, FIRST_VARIANT};
 use rustc_middle::ty::adjustment::PointerCoercion;
 use rustc_middle::ty::layout::{HasTyCtxt, HasTypingEnv, LayoutOf, TyAndLayout};
 use rustc_middle::ty::{self, Instance, Ty, TyCtxt};
-use rustc_middle::{bug, mir};
+use rustc_middle::{bug, mir, span_bug};
 use rustc_session::config::OptLevel;
 use tracing::{debug, instrument};
 
 use super::operand::{OperandRef, OperandRefBuilder, OperandValue};
-use super::place::{PlaceRef, codegen_tag_value};
+use super::place::{PlaceRef, PlaceValue, codegen_tag_value};
 use super::{FunctionCx, LocalRef};
 use crate::common::{IntPredicate, TypeKind};
 use crate::traits::*;
@@ -180,7 +180,6 @@ impl<'a, 'tcx, Bx: BuilderMethods<'a, 'tcx>> FunctionCx<'a, 'tcx, Bx> {
             }
 
             _ => {
-                assert!(self.rvalue_creates_operand(rvalue));
                 let temp = self.codegen_rvalue_operand(bx, rvalue);
                 temp.val.store(bx, dest);
             }
@@ -218,17 +217,26 @@ impl<'a, 'tcx, Bx: BuilderMethods<'a, 'tcx>> FunctionCx<'a, 'tcx, Bx> {
 
     /// Transmutes an `OperandValue` to another `OperandValue`.
     ///
-    /// This is supported only for cases where [`Self::rvalue_creates_operand`]
-    /// returns `true`, and will ICE otherwise. (In particular, anything that
-    /// would need to `alloca` in order to return a `PlaceValue` will ICE,
-    /// expecting those to go via [`Self::codegen_transmute`] instead where
-    /// the destination place is already allocated.)
+    /// This is supported for all cases where the `cast` type is SSA,
+    /// but for non-ZSTs with [`abi::BackendRepr::Memory`] it ICEs.
     pub(crate) fn codegen_transmute_operand(
         &mut self,
         bx: &mut Bx,
         operand: OperandRef<'tcx, Bx::Value>,
         cast: TyAndLayout<'tcx>,
     ) -> OperandValue<Bx::Value> {
+        if let abi::BackendRepr::Memory { .. } = cast.backend_repr
+            && !cast.is_zst()
+        {
+            span_bug!(self.mir.span, "Use `codegen_transmute` to transmute to {cast:?}");
+        }
+
+        // `Layout` is interned, so we can do a cheap check for things that are
+        // exactly the same and thus don't need any handling.
+        if abi::Layout::eq(&operand.layout.layout, &cast.layout) {
+            return operand.val;
+        }
+
         // Check for transmutes that are always UB.
         if operand.layout.size != cast.size
             || operand.layout.is_uninhabited()
@@ -241,11 +249,22 @@ impl<'a, 'tcx, Bx: BuilderMethods<'a, 'tcx>> FunctionCx<'a, 'tcx, Bx> {
             return OperandValue::poison(bx, cast);
         }
 
+        // To or from pointers takes different methods, so we use this to restrict
+        // the SimdVector case to types which can be `bitcast` between each other.
+        #[inline]
+        fn vector_can_bitcast(x: abi::Scalar) -> bool {
+            matches!(
+                x,
+                abi::Scalar::Initialized {
+                    value: abi::Primitive::Int(..) | abi::Primitive::Float(..),
+                    ..
+                }
+            )
+        }
+
+        let cx = bx.cx();
         match (operand.val, operand.layout.backend_repr, cast.backend_repr) {
             _ if cast.is_zst() => OperandValue::ZeroSized,
-            (_, _, abi::BackendRepr::Memory { .. }) => {
-                bug!("Cannot `codegen_transmute_operand` to non-ZST memory-ABI output {cast:?}");
-            }
             (OperandValue::Ref(source_place_val), abi::BackendRepr::Memory { .. }, _) => {
                 assert_eq!(source_place_val.llextra, None);
                 // The existing alignment is part of `source_place_val`,
@@ -256,16 +275,46 @@ impl<'a, 'tcx, Bx: BuilderMethods<'a, 'tcx>> FunctionCx<'a, 'tcx, Bx> {
                 OperandValue::Immediate(imm),
                 abi::BackendRepr::Scalar(from_scalar),
                 abi::BackendRepr::Scalar(to_scalar),
-            ) => OperandValue::Immediate(transmute_scalar(bx, imm, from_scalar, to_scalar)),
+            ) if from_scalar.size(cx) == to_scalar.size(cx) => {
+                OperandValue::Immediate(transmute_scalar(bx, imm, from_scalar, to_scalar))
+            }
+            (
+                OperandValue::Immediate(imm),
+                abi::BackendRepr::SimdVector { element: from_scalar, .. },
+                abi::BackendRepr::SimdVector { element: to_scalar, .. },
+            ) if vector_can_bitcast(from_scalar) && vector_can_bitcast(to_scalar) => {
+                let to_backend_ty = bx.cx().immediate_backend_type(cast);
+                OperandValue::Immediate(bx.bitcast(imm, to_backend_ty))
+            }
             (
                 OperandValue::Pair(imm_a, imm_b),
                 abi::BackendRepr::ScalarPair(in_a, in_b),
                 abi::BackendRepr::ScalarPair(out_a, out_b),
-            ) => OperandValue::Pair(
-                transmute_scalar(bx, imm_a, in_a, out_a),
-                transmute_scalar(bx, imm_b, in_b, out_b),
-            ),
-            _ => bug!("Cannot `codegen_transmute_operand` {operand:?} to {cast:?}"),
+            ) if in_a.size(cx) == out_a.size(cx) && in_b.size(cx) == out_b.size(cx) => {
+                OperandValue::Pair(
+                    transmute_scalar(bx, imm_a, in_a, out_a),
+                    transmute_scalar(bx, imm_b, in_b, out_b),
+                )
+            }
+            _ => {
+                // For any other potentially-tricky cases, make a temporary instead.
+                // If anything else wants the target local to be in memory this won't
+                // be hit, as `codegen_transmute` will get called directly. Thus this
+                // is only for places where everything else wants the operand form,
+                // and thus it's not worth making those places get it from memory.
+                //
+                // Notably, Scalar ⇌ ScalarPair cases go here to avoid padding
+                // and endianness issues, as do SimdVector ones to avoid worrying
+                // about things like f32x8 ⇌ ptrx4 that would need multiple steps.
+                let align = Ord::max(operand.layout.align.abi, cast.align.abi);
+                let size = Ord::max(operand.layout.size, cast.size);
+                let temp = PlaceValue::alloca(bx, size, align);
+                bx.lifetime_start(temp.llval, size);
+                operand.val.store(bx, temp.with_type(operand.layout));
+                let val = bx.load_operand(temp.with_type(cast)).val;
+                bx.lifetime_end(temp.llval, size);
+                val
+            }
         }
     }
 
@@ -326,8 +375,6 @@ impl<'a, 'tcx, Bx: BuilderMethods<'a, 'tcx>> FunctionCx<'a, 'tcx, Bx> {
         bx: &mut Bx,
         rvalue: &mir::Rvalue<'tcx>,
     ) -> OperandRef<'tcx, Bx::Value> {
-        assert!(self.rvalue_creates_operand(rvalue), "cannot codegen {rvalue:?} to operand",);
-
         match *rvalue {
             mir::Rvalue::Cast(ref kind, ref source, mir_cast_ty) => {
                 let operand = self.codegen_operand(bx, source);
@@ -653,8 +700,6 @@ impl<'a, 'tcx, Bx: BuilderMethods<'a, 'tcx>> FunctionCx<'a, 'tcx, Bx> {
                 let ty = self.monomorphize(ty);
                 let layout = self.cx.layout_of(ty);
 
-                // `rvalue_creates_operand` has arranged that we only get here if
-                // we can build the aggregate immediate from the field immediates.
                 let mut builder = OperandRefBuilder::new(layout);
                 for (field_idx, field) in fields.iter_enumerated() {
                     let op = self.codegen_operand(bx, field);
@@ -955,69 +1000,6 @@ impl<'a, 'tcx, Bx: BuilderMethods<'a, 'tcx>> FunctionCx<'a, 'tcx, Bx> {
 
         OperandValue::Pair(val, of)
     }
-
-    /// Returns `true` if the `rvalue` can be computed into an [`OperandRef`],
-    /// rather than needing a full `PlaceRef` for the assignment destination.
-    ///
-    /// This is used by the [`super::analyze`] code to decide which MIR locals
-    /// can stay as SSA values (as opposed to generating `alloca` slots for them).
-    /// As such, some paths here return `true` even where the specific rvalue
-    /// will not actually take the operand path because the result type is such
-    /// that it always gets an `alloca`, but where it's not worth re-checking the
-    /// layout in this code when the right thing will happen anyway.
-    pub(crate) fn rvalue_creates_operand(&self, rvalue: &mir::Rvalue<'tcx>) -> bool {
-        match *rvalue {
-            mir::Rvalue::Cast(mir::CastKind::Transmute, ref operand, cast_ty) => {
-                let operand_ty = operand.ty(self.mir, self.cx.tcx());
-                let cast_layout = self.cx.layout_of(self.monomorphize(cast_ty));
-                let operand_layout = self.cx.layout_of(self.monomorphize(operand_ty));
-                match (operand_layout.backend_repr, cast_layout.backend_repr) {
-                    // When the output will be in memory anyway, just use its place
-                    // (instead of the operand path) unless it's the trivial ZST case.
-                    (_, abi::BackendRepr::Memory { .. }) => cast_layout.is_zst(),
-
-                    // Otherwise (for a non-memory output) if the input is memory
-                    // then we can just read the value from the place.
-                    (abi::BackendRepr::Memory { .. }, _) => true,
-
-                    // When we have scalar immediates, we can only convert things
-                    // where the sizes match, to avoid endianness questions.
-                    (abi::BackendRepr::Scalar(a), abi::BackendRepr::Scalar(b)) =>
-                        a.size(self.cx) == b.size(self.cx),
-                    (abi::BackendRepr::ScalarPair(a0, a1), abi::BackendRepr::ScalarPair(b0, b1)) =>
-                        a0.size(self.cx) == b0.size(self.cx) && a1.size(self.cx) == b1.size(self.cx),
-
-                    // Mixing Scalars and ScalarPairs can get quite complicated when
-                    // padding and undef get involved, so leave that to the memory path.
-                    (abi::BackendRepr::Scalar(_), abi::BackendRepr::ScalarPair(_, _)) |
-                    (abi::BackendRepr::ScalarPair(_, _), abi::BackendRepr::Scalar(_)) => false,
-
-                    // SIMD vectors aren't worth the trouble of dealing with complex
-                    // cases like from vectors of f32 to vectors of pointers or
-                    // from fat pointers to vectors of u16. (See #143194 #110021 ...)
-                    (abi::BackendRepr::SimdVector { .. }, _) | (_, abi::BackendRepr::SimdVector { .. }) => false,
-                }
-            }
-            mir::Rvalue::Ref(..) |
-            mir::Rvalue::CopyForDeref(..) |
-            mir::Rvalue::RawPtr(..) |
-            mir::Rvalue::Len(..) |
-            mir::Rvalue::Cast(..) | // (*)
-            mir::Rvalue::ShallowInitBox(..) | // (*)
-            mir::Rvalue::BinaryOp(..) |
-            mir::Rvalue::UnaryOp(..) |
-            mir::Rvalue::Discriminant(..) |
-            mir::Rvalue::NullaryOp(..) |
-            mir::Rvalue::ThreadLocalRef(_) |
-            mir::Rvalue::Use(..) |
-            mir::Rvalue::Repeat(..) | // (*)
-            mir::Rvalue::Aggregate(..) | // (*)
-            mir::Rvalue::WrapUnsafeBinder(..) => // (*)
-                true,
-        }
-
-        // (*) this is only true if the type is suitable
-    }
 }
 
 /// Transmutes a single scalar value `imm` from `from_scalar` to `to_scalar`.
diff --git a/tests/codegen-llvm/intrinsics/transmute-simd.rs b/tests/codegen-llvm/intrinsics/transmute-simd.rs
new file mode 100644
index 00000000000..e34b27e1333
--- /dev/null
+++ b/tests/codegen-llvm/intrinsics/transmute-simd.rs
@@ -0,0 +1,176 @@
+//@ compile-flags: -Copt-level=3 -C no-prepopulate-passes
+//@ only-64bit (so I don't need to worry about usize)
+//@ revisions: aarch64 x86_64
+//@ [aarch64] only-aarch64
+//@ [aarch64] compile-flags: -C target-feature=+neon
+//@ [x86_64] only-x86_64
+//@ [x86_64] compile-flags: -C target-feature=+sse2
+
+#![crate_type = "lib"]
+#![feature(core_intrinsics)]
+#![feature(portable_simd)]
+
+use std::intrinsics::transmute;
+use std::simd::{Simd, f32x4, f64x2, i32x4, i64x2};
+type PtrX2 = Simd<*const (), 2>;
+
+// These tests use the "C" ABI so that the vectors in question aren't passed and
+// returned though memory (as they are in the "Rust" ABI), which greatly
+// simplifies seeing the difference between the in-operand cases vs the ones
+// that fallback to just using the `LocalKind::Memory` path.
+
+// CHECK-LABEL: <2 x i64> @mixed_int(<4 x i32> %v)
+#[no_mangle]
+pub extern "C" fn mixed_int(v: i32x4) -> i64x2 {
+    // CHECK-NOT: alloca
+    // CHECK: %[[RET:.+]] = bitcast <4 x i32> %v to <2 x i64>
+    // CHECK: ret <2 x i64> %[[RET]]
+    unsafe { transmute(v) }
+}
+
+// CHECK-LABEL: <2 x double> @mixed_float(<4 x float> %v)
+#[no_mangle]
+pub extern "C" fn mixed_float(v: f32x4) -> f64x2 {
+    // CHECK-NOT: alloca
+    // CHECK: %[[RET:.+]] = bitcast <4 x float> %v to <2 x double>
+    // CHECK: ret <2 x double> %[[RET]]
+    unsafe { transmute(v) }
+}
+
+// CHECK-LABEL: <4 x i32> @float_int_same_lanes(<4 x float> %v)
+#[no_mangle]
+pub extern "C" fn float_int_same_lanes(v: f32x4) -> i32x4 {
+    // CHECK-NOT: alloca
+    // CHECK: %[[RET:.+]] = bitcast <4 x float> %v to <4 x i32>
+    // CHECK: ret <4 x i32> %[[RET]]
+    unsafe { transmute(v) }
+}
+
+// CHECK-LABEL: <2 x double> @int_float_same_lanes(<2 x i64> %v)
+#[no_mangle]
+pub extern "C" fn int_float_same_lanes(v: i64x2) -> f64x2 {
+    // CHECK-NOT: alloca
+    // CHECK: %[[RET:.+]] = bitcast <2 x i64> %v to <2 x double>
+    // CHECK: ret <2 x double> %[[RET]]
+    unsafe { transmute(v) }
+}
+
+// CHECK-LABEL: <2 x i64> @float_int_widen(<4 x float> %v)
+#[no_mangle]
+pub extern "C" fn float_int_widen(v: f32x4) -> i64x2 {
+    // CHECK-NOT: alloca
+    // CHECK: %[[RET:.+]] = bitcast <4 x float> %v to <2 x i64>
+    // CHECK: ret <2 x i64> %[[RET]]
+    unsafe { transmute(v) }
+}
+
+// CHECK-LABEL: <2 x double> @int_float_widen(<4 x i32> %v)
+#[no_mangle]
+pub extern "C" fn int_float_widen(v: i32x4) -> f64x2 {
+    // CHECK-NOT: alloca
+    // CHECK: %[[RET:.+]] = bitcast <4 x i32> %v to <2 x double>
+    // CHECK: ret <2 x double> %[[RET]]
+    unsafe { transmute(v) }
+}
+
+// CHECK-LABEL: <4 x i32> @float_int_narrow(<2 x double> %v)
+#[no_mangle]
+pub extern "C" fn float_int_narrow(v: f64x2) -> i32x4 {
+    // CHECK-NOT: alloca
+    // CHECK: %[[RET:.+]] = bitcast <2 x double> %v to <4 x i32>
+    // CHECK: ret <4 x i32> %[[RET]]
+    unsafe { transmute(v) }
+}
+
+// CHECK-LABEL: <4 x float> @int_float_narrow(<2 x i64> %v)
+#[no_mangle]
+pub extern "C" fn int_float_narrow(v: i64x2) -> f32x4 {
+    // CHECK-NOT: alloca
+    // CHECK: %[[RET:.+]] = bitcast <2 x i64> %v to <4 x float>
+    // CHECK: ret <4 x float> %[[RET]]
+    unsafe { transmute(v) }
+}
+
+// CHECK-LABEL: <2 x ptr> @float_ptr_same_lanes(<2 x double> %v)
+#[no_mangle]
+pub extern "C" fn float_ptr_same_lanes(v: f64x2) -> PtrX2 {
+    // CHECK-NOT: alloca
+    // CHECK: %[[TEMP:.+]] = alloca [16 x i8]
+    // CHECK-NOT: alloca
+    // CHECK: call void @llvm.lifetime.start.p0(i64 16, ptr %[[TEMP]])
+    // CHECK: store <2 x double> %v, ptr %[[TEMP]]
+    // CHECK: %[[RET:.+]] = load <2 x ptr>, ptr %[[TEMP]]
+    // CHECK: call void @llvm.lifetime.end.p0(i64 16, ptr %[[TEMP]])
+    // CHECK: ret <2 x ptr> %[[RET]]
+    unsafe { transmute(v) }
+}
+
+// CHECK-LABEL: <2 x double> @ptr_float_same_lanes(<2 x ptr> %v)
+#[no_mangle]
+pub extern "C" fn ptr_float_same_lanes(v: PtrX2) -> f64x2 {
+    // CHECK-NOT: alloca
+    // CHECK: %[[TEMP:.+]] = alloca [16 x i8]
+    // CHECK-NOT: alloca
+    // CHECK: call void @llvm.lifetime.start.p0(i64 16, ptr %[[TEMP]])
+    // CHECK: store <2 x ptr> %v, ptr %[[TEMP]]
+    // CHECK: %[[RET:.+]] = load <2 x double>, ptr %[[TEMP]]
+    // CHECK: call void @llvm.lifetime.end.p0(i64 16, ptr %[[TEMP]])
+    // CHECK: ret <2 x double> %[[RET]]
+    unsafe { transmute(v) }
+}
+
+// CHECK-LABEL: <2 x ptr> @int_ptr_same_lanes(<2 x i64> %v)
+#[no_mangle]
+pub extern "C" fn int_ptr_same_lanes(v: i64x2) -> PtrX2 {
+    // CHECK-NOT: alloca
+    // CHECK: %[[TEMP:.+]] = alloca [16 x i8]
+    // CHECK-NOT: alloca
+    // CHECK: call void @llvm.lifetime.start.p0(i64 16, ptr %[[TEMP]])
+    // CHECK: store <2 x i64> %v, ptr %[[TEMP]]
+    // CHECK: %[[RET:.+]] = load <2 x ptr>, ptr %[[TEMP]]
+    // CHECK: call void @llvm.lifetime.end.p0(i64 16, ptr %[[TEMP]])
+    // CHECK: ret <2 x ptr> %[[RET]]
+    unsafe { transmute(v) }
+}
+
+// CHECK-LABEL: <2 x i64> @ptr_int_same_lanes(<2 x ptr> %v)
+#[no_mangle]
+pub extern "C" fn ptr_int_same_lanes(v: PtrX2) -> i64x2 {
+    // CHECK-NOT: alloca
+    // CHECK: %[[TEMP:.+]] = alloca [16 x i8]
+    // CHECK-NOT: alloca
+    // CHECK: call void @llvm.lifetime.start.p0(i64 16, ptr %[[TEMP]])
+    // CHECK: store <2 x ptr> %v, ptr %[[TEMP]]
+    // CHECK: %[[RET:.+]] = load <2 x i64>, ptr %[[TEMP]]
+    // CHECK: call void @llvm.lifetime.end.p0(i64 16, ptr %[[TEMP]])
+    // CHECK: ret <2 x i64> %[[RET]]
+    unsafe { transmute(v) }
+}
+
+// CHECK-LABEL: <2 x ptr> @float_ptr_widen(<4 x float> %v)
+#[no_mangle]
+pub extern "C" fn float_ptr_widen(v: f32x4) -> PtrX2 {
+    // CHECK-NOT: alloca
+    // CHECK: %[[TEMP:.+]] = alloca [16 x i8]
+    // CHECK-NOT: alloca
+    // CHECK: call void @llvm.lifetime.start.p0(i64 16, ptr %[[TEMP]])
+    // CHECK: store <4 x float> %v, ptr %[[TEMP]]
+    // CHECK: %[[RET:.+]] = load <2 x ptr>, ptr %[[TEMP]]
+    // CHECK: call void @llvm.lifetime.end.p0(i64 16, ptr %[[TEMP]])
+    // CHECK: ret <2 x ptr> %[[RET]]
+    unsafe { transmute(v) }
+}
+
+// CHECK-LABEL: <2 x ptr> @int_ptr_widen(<4 x i32> %v)
+#[no_mangle]
+pub extern "C" fn int_ptr_widen(v: i32x4) -> PtrX2 {
+    // CHECK-NOT: alloca
+    // CHECK: %[[TEMP:.+]] = alloca [16 x i8]
+    // CHECK-NOT: alloca
+    // CHECK: call void @llvm.lifetime.start.p0(i64 16, ptr %[[TEMP]])
+    // CHECK: store <4 x i32> %v, ptr %[[TEMP]]
+    // CHECK: %[[RET:.+]] = load <2 x ptr>, ptr %[[TEMP]]
+    // CHECK: call void @llvm.lifetime.end.p0(i64 16, ptr %[[TEMP]])
+    // CHECK: ret <2 x ptr> %[[RET]]
+    unsafe { transmute(v) }
+}
diff --git a/tests/codegen-llvm/intrinsics/transmute.rs b/tests/codegen-llvm/intrinsics/transmute.rs
index c9a1cd58af3..91cff38773d 100644
--- a/tests/codegen-llvm/intrinsics/transmute.rs
+++ b/tests/codegen-llvm/intrinsics/transmute.rs
@@ -191,22 +191,28 @@ pub unsafe fn check_byte_from_bool(x: bool) -> u8 {
 // CHECK-LABEL: @check_to_pair(
 #[no_mangle]
 pub unsafe fn check_to_pair(x: u64) -> Option<i32> {
-    // CHECK: %_0 = alloca [8 x i8], align 4
-    // CHECK: store i64 %x, ptr %_0, align 4
+    // CHECK: %[[TEMP:.+]] = alloca [8 x i8], align 8
+    // CHECK: call void @llvm.lifetime.start.p0(i64 8, ptr %[[TEMP]])
+    // CHECK: store i64 %x, ptr %[[TEMP]], align 8
+    // CHECK: %[[PAIR0:.+]] = load i32, ptr %[[TEMP]], align 8
+    // CHECK: %[[PAIR1P:.+]] = getelementptr inbounds i8, ptr %[[TEMP]], i64 4
+    // CHECK: %[[PAIR1:.+]] = load i32, ptr %[[PAIR1P]], align 4
+    // CHECK: call void @llvm.lifetime.end.p0(i64 8, ptr %[[TEMP]])
+    // CHECK: insertvalue {{.+}}, i32 %[[PAIR0]], 0
+    // CHECK: insertvalue {{.+}}, i32 %[[PAIR1]], 1
     transmute(x)
 }
 
 // CHECK-LABEL: @check_from_pair(
 #[no_mangle]
 pub unsafe fn check_from_pair(x: Option<i32>) -> u64 {
-    // The two arguments are of types that are only 4-aligned, but they're
-    // immediates so we can write using the destination alloca's alignment.
-    const { assert!(std::mem::align_of::<Option<i32>>() == 4) };
-
-    // CHECK: %_0 = alloca [8 x i8], align 8
-    // CHECK: store i32 %x.0, ptr %_0, align 8
-    // CHECK: store i32 %x.1, ptr %0, align 4
-    // CHECK: %[[R:.+]] = load i64, ptr %_0, align 8
+    // CHECK: %[[TEMP:.+]] = alloca [8 x i8], align 8
+    // CHECK: call void @llvm.lifetime.start.p0(i64 8, ptr %[[TEMP]])
+    // CHECK: store i32 %x.0, ptr %[[TEMP]], align 8
+    // CHECK: %[[PAIR1P:.+]] = getelementptr inbounds i8, ptr %[[TEMP]], i64 4
+    // CHECK: store i32 %x.1, ptr %[[PAIR1P]], align 4
+    // CHECK: %[[R:.+]] = load i64, ptr %[[TEMP]], align 8
+    // CHECK: call void @llvm.lifetime.end.p0(i64 8, ptr %[[TEMP]])
     // CHECK: ret i64 %[[R]]
     transmute(x)
 }