about summary refs log tree commit diff
diff options
context:
space:
mode:
authorStuart Cook <Zalathar@users.noreply.github.com>2025-09-12 20:02:10 +1000
committerGitHub <noreply@github.com>2025-09-12 20:02:10 +1000
commit48d684111e462b1ff600728b9a816520a4f3276a (patch)
treec76b3154a008882915152eb791c2d3fd127d4d56
parent8e2ed71effd5f81bff319c0c7adaca42084e2a71 (diff)
parent94fbb2178ae5960894aaae199233b0d3ac020d79 (diff)
downloadrust-48d684111e462b1ff600728b9a816520a4f3276a.tar.gz
rust-48d684111e462b1ff600728b9a816520a4f3276a.zip
Rollup merge of #144549 - folkertdev:va-arg-arm, r=saethlin
match clang's `va_arg` assembly on arm targets

tracking issue: https://github.com/rust-lang/rust/issues/44930

For this example

```rust
#![feature(c_variadic)]

#[unsafe(no_mangle)]
unsafe extern "C" fn variadic(a: f64, mut args: ...) -> f64 {
    let b = args.arg::<f64>();
    let c = args.arg::<f64>();

    a + b + c
}
```

We currently generate (via llvm):

```asm
variadic:
    sub     sp, sp, #12
    stmib   sp, {r2, r3}
    vmov    d0, r0, r1
    add     r0, sp, #4
    vldr    d1, [sp, #4]
    add     r0, r0, #15
    bic     r0, r0, #7
    vadd.f64        d0, d0, d1
    add     r1, r0, #8
    str     r1, [sp]
    vldr    d1, [r0]
    vadd.f64        d0, d0, d1
    vmov    r0, r1, d0
    add     sp, sp, #12
    bx      lr
```

LLVM is not doing a good job. In fact, it's well-known that LLVM's implementation of `va_arg` is kind of bad, and we implement it ourselves (based on clang) for many targets already. For arm,  our own `emit_ptr_va_arg` saves 3 instructions.

Next, it turns out it's important for LLVM to explicitly start and end the lifetime of the `va_list`. In https://github.com/rust-lang/rust/pull/146059 I already end the lifetime, but when looking at this again, I noticed that it is important to also start it, see https://godbolt.org/z/EGqvKTTsK: failing to explicitly start the lifetime uses an extra register.

So, the combination of `emit_ptr_va_arg` with starting/ending the lifetime makes rustc emit exactly the instructions that clang generates::

```asm
variadic:
    sub     sp, sp, #12
    stmib   sp, {r2, r3}
    vmov    d16, r0, r1
    vldr    d17, [sp, #4]
    vadd.f64        d16, d16, d17
    vldr    d17, [sp, #12]
    vadd.f64        d16, d16, d17
    vmov    r0, r1, d16
    add     sp, sp, #12
    bx      lr
```

The arguments to `emit_ptr_va_arg` are based on [the clang implementation](https://github.com/llvm/llvm-project/blob/03dc2a41f3d9a500e47b513de5c5008c06860d65/clang/lib/CodeGen/Targets/ARM.cpp#L798-L844).

r? ``@workingjubilee`` (I can re-roll if your queue is too full, but you do seem like the right person here)

try-job: armhf-gnu
-rw-r--r--compiler/rustc_codegen_llvm/src/va_arg.rs15
-rw-r--r--compiler/rustc_codegen_ssa/src/mir/block.rs2
-rw-r--r--compiler/rustc_codegen_ssa/src/mir/mod.rs4
-rw-r--r--tests/assembly-llvm/c-variadic-arm.rs26
-rw-r--r--tests/codegen-llvm/c-variadic-lifetime.rs21
5 files changed, 67 insertions, 1 deletions
diff --git a/compiler/rustc_codegen_llvm/src/va_arg.rs b/compiler/rustc_codegen_llvm/src/va_arg.rs
index 7eb5d302058..ab08125217f 100644
--- a/compiler/rustc_codegen_llvm/src/va_arg.rs
+++ b/compiler/rustc_codegen_llvm/src/va_arg.rs
@@ -908,6 +908,21 @@ pub(super) fn emit_va_arg<'ll, 'tcx>(
             )
         }
         "aarch64" => emit_aapcs_va_arg(bx, addr, target_ty),
+        "arm" => {
+            // Types wider than 16 bytes are not currently supported. Clang has special logic for
+            // such types, but `VaArgSafe` is not implemented for any type that is this large.
+            assert!(bx.cx.size_of(target_ty).bytes() <= 16);
+
+            emit_ptr_va_arg(
+                bx,
+                addr,
+                target_ty,
+                PassMode::Direct,
+                SlotSize::Bytes4,
+                AllowHigherAlign::Yes,
+                ForceRightAdjust::No,
+            )
+        }
         "s390x" => emit_s390x_va_arg(bx, addr, target_ty),
         "powerpc" => emit_powerpc_va_arg(bx, addr, target_ty),
         "powerpc64" | "powerpc64le" => emit_ptr_va_arg(
diff --git a/compiler/rustc_codegen_ssa/src/mir/block.rs b/compiler/rustc_codegen_ssa/src/mir/block.rs
index 5f6976f5d00..6492ef73956 100644
--- a/compiler/rustc_codegen_ssa/src/mir/block.rs
+++ b/compiler/rustc_codegen_ssa/src/mir/block.rs
@@ -520,7 +520,7 @@ impl<'a, 'tcx, Bx: BuilderMethods<'a, 'tcx>> FunctionCx<'a, 'tcx, Bx> {
                 LocalRef::Place(va_list) => {
                     bx.va_end(va_list.val.llval);
 
-                    // Explicitly end the lifetime of the `va_list`, this matters for LLVM.
+                    // Explicitly end the lifetime of the `va_list`, improves LLVM codegen.
                     bx.lifetime_end(va_list.val.llval, va_list.layout.size);
                 }
                 _ => bug!("C-variadic function must have a `VaList` place"),
diff --git a/compiler/rustc_codegen_ssa/src/mir/mod.rs b/compiler/rustc_codegen_ssa/src/mir/mod.rs
index 06873313e2e..6b109e8b8e2 100644
--- a/compiler/rustc_codegen_ssa/src/mir/mod.rs
+++ b/compiler/rustc_codegen_ssa/src/mir/mod.rs
@@ -438,6 +438,10 @@ fn arg_local_refs<'a, 'tcx, Bx: BuilderMethods<'a, 'tcx>>(
 
             if fx.fn_abi.c_variadic && arg_index == fx.fn_abi.args.len() {
                 let va_list = PlaceRef::alloca(bx, bx.layout_of(arg_ty));
+
+                // Explicitly start the lifetime of the `va_list`, improves LLVM codegen.
+                bx.lifetime_start(va_list.val.llval, va_list.layout.size);
+
                 bx.va_start(va_list.val.llval);
 
                 return LocalRef::Place(va_list);
diff --git a/tests/assembly-llvm/c-variadic-arm.rs b/tests/assembly-llvm/c-variadic-arm.rs
new file mode 100644
index 00000000000..2ef307405e1
--- /dev/null
+++ b/tests/assembly-llvm/c-variadic-arm.rs
@@ -0,0 +1,26 @@
+//@ assembly-output: emit-asm
+//@ compile-flags: -Copt-level=3
+//@ only-arm
+//@ ignore-thumb
+//@ ignore-android
+#![no_std]
+#![crate_type = "lib"]
+#![feature(c_variadic)]
+
+// Check that the assembly that rustc generates matches what clang emits.
+
+#[unsafe(no_mangle)]
+unsafe extern "C" fn variadic(a: f64, mut args: ...) -> f64 {
+    // CHECK-LABEL: variadic
+    // CHECK: sub sp, sp
+
+    // CHECK: vldr
+    // CHECK: vadd.f64
+    // CHECK: vldr
+    // CHECK: vadd.f64
+    let b = args.arg::<f64>();
+    let c = args.arg::<f64>();
+    a + b + c
+
+    // CHECK: add sp, sp
+}
diff --git a/tests/codegen-llvm/c-variadic-lifetime.rs b/tests/codegen-llvm/c-variadic-lifetime.rs
new file mode 100644
index 00000000000..5b2f8af18c8
--- /dev/null
+++ b/tests/codegen-llvm/c-variadic-lifetime.rs
@@ -0,0 +1,21 @@
+//@ add-core-stubs
+//@ compile-flags: -Copt-level=3
+#![feature(c_variadic)]
+#![crate_type = "lib"]
+
+// Check that `%args` explicitly has its lifetime start and end. Being explicit can improve
+// instruction and register selection, see e.g. https://github.com/rust-lang/rust/pull/144549
+
+#[unsafe(no_mangle)]
+unsafe extern "C" fn variadic(a: f64, mut args: ...) -> f64 {
+    // CHECK: call void @llvm.lifetime.start.p0(i64 {{[0-9]+}}, ptr nonnull %args)
+    // CHECK: call void @llvm.va_start.p0(ptr nonnull %args)
+
+    let b = args.arg::<f64>();
+    let c = args.arg::<f64>();
+
+    a + b + c
+
+    // CHECK: call void @llvm.va_end.p0(ptr nonnull %args)
+    // CHECK: call void @llvm.lifetime.end.p0(i64 {{[0-9]+}}, ptr nonnull %args)
+}