about summary refs log tree commit diff
diff options
context:
space:
mode:
authorErik Desjardins <erikdesjardins@users.noreply.github.com>2024-03-08 17:11:27 -0500
committerErik Desjardins <erikdesjardins@users.noreply.github.com>2024-03-11 09:38:54 -0400
commit207fe38630fdcfecea048aa41c6a9dd47c19573f (patch)
tree52311de9cdeb4642f8bc217657654f128a07c511
parente919669d42dfb8950866d4cb268c5359eb3f7c54 (diff)
downloadrust-207fe38630fdcfecea048aa41c6a9dd47c19573f.tar.gz
rust-207fe38630fdcfecea048aa41c6a9dd47c19573f.zip
copy byval argument to alloca if alignment is insufficient
-rw-r--r--compiler/rustc_codegen_llvm/src/abi.rs104
-rw-r--r--compiler/rustc_codegen_ssa/src/mir/mod.rs62
-rw-r--r--tests/codegen/align-byval-alignment-mismatch.rs126
3 files changed, 220 insertions, 72 deletions
diff --git a/compiler/rustc_codegen_llvm/src/abi.rs b/compiler/rustc_codegen_llvm/src/abi.rs
index 147939d3a52..e5f5146fac8 100644
--- a/compiler/rustc_codegen_llvm/src/abi.rs
+++ b/compiler/rustc_codegen_llvm/src/abi.rs
@@ -203,57 +203,63 @@ impl<'ll, 'tcx> ArgAbiExt<'ll, 'tcx> for ArgAbi<'tcx, Ty<'tcx>> {
         val: &'ll Value,
         dst: PlaceRef<'tcx, &'ll Value>,
     ) {
-        if self.is_ignore() {
-            return;
-        }
-        if self.is_sized_indirect() {
-            OperandValue::Ref(val, None, self.layout.align.abi).store(bx, dst)
-        } else if self.is_unsized_indirect() {
-            bug!("unsized `ArgAbi` must be handled through `store_fn_arg`");
-        } else if let PassMode::Cast { cast, pad_i32: _ } = &self.mode {
-            // FIXME(eddyb): Figure out when the simpler Store is safe, clang
-            // uses it for i16 -> {i8, i8}, but not for i24 -> {i8, i8, i8}.
-            let can_store_through_cast_ptr = false;
-            if can_store_through_cast_ptr {
-                bx.store(val, dst.llval, self.layout.align.abi);
-            } else {
-                // The actual return type is a struct, but the ABI
-                // adaptation code has cast it into some scalar type. The
-                // code that follows is the only reliable way I have
-                // found to do a transform like i64 -> {i32,i32}.
-                // Basically we dump the data onto the stack then memcpy it.
-                //
-                // Other approaches I tried:
-                // - Casting rust ret pointer to the foreign type and using Store
-                //   is (a) unsafe if size of foreign type > size of rust type and
-                //   (b) runs afoul of strict aliasing rules, yielding invalid
-                //   assembly under -O (specifically, the store gets removed).
-                // - Truncating foreign type to correct integral type and then
-                //   bitcasting to the struct type yields invalid cast errors.
-
-                // We instead thus allocate some scratch space...
-                let scratch_size = cast.size(bx);
-                let scratch_align = cast.align(bx);
-                let llscratch = bx.alloca(cast.llvm_type(bx), scratch_align);
-                bx.lifetime_start(llscratch, scratch_size);
-
-                // ... where we first store the value...
-                bx.store(val, llscratch, scratch_align);
-
-                // ... and then memcpy it to the intended destination.
-                bx.memcpy(
-                    dst.llval,
-                    self.layout.align.abi,
-                    llscratch,
-                    scratch_align,
-                    bx.const_usize(self.layout.size.bytes()),
-                    MemFlags::empty(),
-                );
+        match &self.mode {
+            PassMode::Ignore => {}
+            // Sized indirect arguments
+            PassMode::Indirect { attrs, meta_attrs: None, on_stack: _ } => {
+                let align = attrs.pointee_align.unwrap_or(self.layout.align.abi);
+                OperandValue::Ref(val, None, align).store(bx, dst);
+            }
+            // Unsized indirect qrguments
+            PassMode::Indirect { attrs: _, meta_attrs: Some(_), on_stack: _ } => {
+                bug!("unsized `ArgAbi` must be handled through `store_fn_arg`");
+            }
+            PassMode::Cast { cast, pad_i32: _ } => {
+                // FIXME(eddyb): Figure out when the simpler Store is safe, clang
+                // uses it for i16 -> {i8, i8}, but not for i24 -> {i8, i8, i8}.
+                let can_store_through_cast_ptr = false;
+                if can_store_through_cast_ptr {
+                    bx.store(val, dst.llval, self.layout.align.abi);
+                } else {
+                    // The actual return type is a struct, but the ABI
+                    // adaptation code has cast it into some scalar type. The
+                    // code that follows is the only reliable way I have
+                    // found to do a transform like i64 -> {i32,i32}.
+                    // Basically we dump the data onto the stack then memcpy it.
+                    //
+                    // Other approaches I tried:
+                    // - Casting rust ret pointer to the foreign type and using Store
+                    //   is (a) unsafe if size of foreign type > size of rust type and
+                    //   (b) runs afoul of strict aliasing rules, yielding invalid
+                    //   assembly under -O (specifically, the store gets removed).
+                    // - Truncating foreign type to correct integral type and then
+                    //   bitcasting to the struct type yields invalid cast errors.
+
+                    // We instead thus allocate some scratch space...
+                    let scratch_size = cast.size(bx);
+                    let scratch_align = cast.align(bx);
+                    let llscratch = bx.alloca(cast.llvm_type(bx), scratch_align);
+                    bx.lifetime_start(llscratch, scratch_size);
+
+                    // ... where we first store the value...
+                    bx.store(val, llscratch, scratch_align);
+
+                    // ... and then memcpy it to the intended destination.
+                    bx.memcpy(
+                        dst.llval,
+                        self.layout.align.abi,
+                        llscratch,
+                        scratch_align,
+                        bx.const_usize(self.layout.size.bytes()),
+                        MemFlags::empty(),
+                    );
 
-                bx.lifetime_end(llscratch, scratch_size);
+                    bx.lifetime_end(llscratch, scratch_size);
+                }
+            }
+            _ => {
+                OperandRef::from_immediate_or_packed_pair(bx, val, self.layout).val.store(bx, dst);
             }
-        } else {
-            OperandRef::from_immediate_or_packed_pair(bx, val, self.layout).val.store(bx, dst);
         }
     }
 
diff --git a/compiler/rustc_codegen_ssa/src/mir/mod.rs b/compiler/rustc_codegen_ssa/src/mir/mod.rs
index a6fcf1fd38c..2ae51d18c7e 100644
--- a/compiler/rustc_codegen_ssa/src/mir/mod.rs
+++ b/compiler/rustc_codegen_ssa/src/mir/mod.rs
@@ -367,29 +367,45 @@ fn arg_local_refs<'a, 'tcx, Bx: BuilderMethods<'a, 'tcx>>(
                 }
             }
 
-            if arg.is_sized_indirect() {
-                // Don't copy an indirect argument to an alloca, the caller
-                // already put it in a temporary alloca and gave it up.
-                // FIXME: lifetimes
-                let llarg = bx.get_param(llarg_idx);
-                llarg_idx += 1;
-                LocalRef::Place(PlaceRef::new_sized(llarg, arg.layout))
-            } else if arg.is_unsized_indirect() {
-                // As the storage for the indirect argument lives during
-                // the whole function call, we just copy the fat pointer.
-                let llarg = bx.get_param(llarg_idx);
-                llarg_idx += 1;
-                let llextra = bx.get_param(llarg_idx);
-                llarg_idx += 1;
-                let indirect_operand = OperandValue::Pair(llarg, llextra);
-
-                let tmp = PlaceRef::alloca_unsized_indirect(bx, arg.layout);
-                indirect_operand.store(bx, tmp);
-                LocalRef::UnsizedPlace(tmp)
-            } else {
-                let tmp = PlaceRef::alloca(bx, arg.layout);
-                bx.store_fn_arg(arg, &mut llarg_idx, tmp);
-                LocalRef::Place(tmp)
+            match arg.mode {
+                // Sized indirect arguments
+                PassMode::Indirect { attrs, meta_attrs: None, on_stack: _ } => {
+                    // Don't copy an indirect argument to an alloca, the caller already put it
+                    // in a temporary alloca and gave it up.
+                    // FIXME: lifetimes
+                    if let Some(pointee_align) = attrs.pointee_align
+                        && pointee_align < arg.layout.align.abi
+                    {
+                        // ...unless the argument is underaligned, then we need to copy it to
+                        // a higher-aligned alloca.
+                        let tmp = PlaceRef::alloca(bx, arg.layout);
+                        bx.store_fn_arg(arg, &mut llarg_idx, tmp);
+                        LocalRef::Place(tmp)
+                    } else {
+                        let llarg = bx.get_param(llarg_idx);
+                        llarg_idx += 1;
+                        LocalRef::Place(PlaceRef::new_sized(llarg, arg.layout))
+                    }
+                }
+                // Unsized indirect qrguments
+                PassMode::Indirect { attrs: _, meta_attrs: Some(_), on_stack: _ } => {
+                    // As the storage for the indirect argument lives during
+                    // the whole function call, we just copy the fat pointer.
+                    let llarg = bx.get_param(llarg_idx);
+                    llarg_idx += 1;
+                    let llextra = bx.get_param(llarg_idx);
+                    llarg_idx += 1;
+                    let indirect_operand = OperandValue::Pair(llarg, llextra);
+
+                    let tmp = PlaceRef::alloca_unsized_indirect(bx, arg.layout);
+                    indirect_operand.store(bx, tmp);
+                    LocalRef::UnsizedPlace(tmp)
+                }
+                _ => {
+                    let tmp = PlaceRef::alloca(bx, arg.layout);
+                    bx.store_fn_arg(arg, &mut llarg_idx, tmp);
+                    LocalRef::Place(tmp)
+                }
             }
         })
         .collect::<Vec<_>>();
diff --git a/tests/codegen/align-byval-alignment-mismatch.rs b/tests/codegen/align-byval-alignment-mismatch.rs
new file mode 100644
index 00000000000..306e3ce1358
--- /dev/null
+++ b/tests/codegen/align-byval-alignment-mismatch.rs
@@ -0,0 +1,126 @@
+// ignore-tidy-linelength
+//@ revisions:i686-linux x86_64-linux
+
+//@[i686-linux] compile-flags: --target i686-unknown-linux-gnu
+//@[i686-linux] needs-llvm-components: x86
+//@[x86_64-linux] compile-flags: --target x86_64-unknown-linux-gnu
+//@[x86_64-linux] needs-llvm-components: x86
+
+// Tests that we correctly copy arguments into allocas when the alignment of the byval argument
+// is different from the alignment of the Rust type.
+
+// For the following test cases:
+// All of the `*_decreases_alignment` functions should codegen to a direct call, since the
+// alignment is already sufficient.
+// All off the `*_increases_alignment` functions should copy the argument to an alloca
+// on i686-unknown-linux-gnu, since the alignment needs to be increased, and should codegen
+// to a direct call on x86_64-unknown-linux-gnu, where byval alignment matches Rust alignment.
+
+#![feature(no_core, lang_items)]
+#![crate_type = "lib"]
+#![no_std]
+#![no_core]
+#![allow(non_camel_case_types)]
+
+#[lang = "sized"]
+trait Sized {}
+#[lang = "freeze"]
+trait Freeze {}
+#[lang = "copy"]
+trait Copy {}
+
+// This type has align 1 in Rust, but as a byval argument on i686-linux, it will have align 4.
+#[repr(C)]
+#[repr(packed)]
+struct Align1 {
+    x: u128,
+    y: u128,
+    z: u128,
+}
+
+// This type has align 16 in Rust, but as a byval argument on i686-linux, it will have align 4.
+#[repr(C)]
+#[repr(align(16))]
+struct Align16 {
+    x: u128,
+    y: u128,
+    z: u128,
+}
+
+extern "C" {
+    fn extern_c_align1(x: Align1);
+    fn extern_c_align16(x: Align16);
+}
+
+// CHECK-LABEL: @rust_to_c_increases_alignment
+#[no_mangle]
+pub unsafe fn rust_to_c_increases_alignment(x: Align1) {
+    // i686-linux: start:
+    // i686-linux-NEXT: [[ALLOCA:%[0-9a-z]+]] = alloca %Align1, align 4
+    // i686-linux-NEXT: call void @llvm.memcpy.{{.+}}(ptr {{.*}}align 4 {{.*}}[[ALLOCA]], ptr {{.*}}align 1 {{.*}}%x
+    // i686-linux-NEXT: call void @extern_c_align1({{.+}} [[ALLOCA]])
+
+    // x86_64-linux: start:
+    // x86_64-linux-NEXT: call void @extern_c_align1
+    extern_c_align1(x);
+}
+
+// CHECK-LABEL: @rust_to_c_decreases_alignment
+#[no_mangle]
+pub unsafe fn rust_to_c_decreases_alignment(x: Align16) {
+    // CHECK: start:
+    // CHECK-NEXT: call void @extern_c_align16
+    extern_c_align16(x);
+}
+
+extern "Rust" {
+    fn extern_rust_align1(x: Align1);
+    fn extern_rust_align16(x: Align16);
+}
+
+// CHECK-LABEL: @c_to_rust_decreases_alignment
+#[no_mangle]
+pub unsafe extern "C" fn c_to_rust_decreases_alignment(x: Align1) {
+    // CHECK: start:
+    // CHECK-NEXT: call void @extern_rust_align1
+    extern_rust_align1(x);
+}
+
+// CHECK-LABEL: @c_to_rust_increases_alignment
+#[no_mangle]
+pub unsafe extern "C" fn c_to_rust_increases_alignment(x: Align16) {
+    // i686-linux: start:
+    // i686-linux-NEXT: [[ALLOCA:%[0-9a-z]+]] = alloca %Align16, align 16
+    // i686-linux-NEXT: call void @llvm.memcpy.{{.+}}(ptr {{.*}}align 16 {{.*}}[[ALLOCA]], ptr {{.*}}align 4 {{.*}}%0
+    // i686-linux-NEXT: call void @extern_rust_align16({{.+}} [[ALLOCA]])
+
+    // x86_64-linux: start:
+    // x86_64-linux-NEXT: call void @extern_rust_align16
+    extern_rust_align16(x);
+}
+
+extern "Rust" {
+    fn extern_rust_ref_align1(x: &Align1);
+    fn extern_rust_ref_align16(x: &Align16);
+}
+
+// CHECK-LABEL: @c_to_rust_ref_decreases_alignment
+#[no_mangle]
+pub unsafe extern "C" fn c_to_rust_ref_decreases_alignment(x: Align1) {
+    // CHECK: start:
+    // CHECK-NEXT: call void @extern_rust_ref_align1
+    extern_rust_ref_align1(&x);
+}
+
+// CHECK-LABEL: @c_to_rust_ref_increases_alignment
+#[no_mangle]
+pub unsafe extern "C" fn c_to_rust_ref_increases_alignment(x: Align16) {
+    // i686-linux: start:
+    // i686-linux-NEXT: [[ALLOCA:%[0-9a-z]+]] = alloca %Align16, align 16
+    // i686-linux-NEXT: call void @llvm.memcpy.{{.+}}(ptr {{.*}}align 16 {{.*}}[[ALLOCA]], ptr {{.*}}align 4 {{.*}}%0
+    // i686-linux-NEXT: call void @extern_rust_ref_align16({{.+}} [[ALLOCA]])
+
+    // x86_64-linux: start:
+    // x86_64-linux-NEXT: call void @extern_rust_ref_align16
+    extern_rust_ref_align16(&x);
+}