about summary refs log tree commit diff
diff options
context:
space:
mode:
authorbeetrees <b@beetr.ee>2024-08-25 00:13:25 +0100
committerbeetrees <b@beetr.ee>2024-08-25 00:13:25 +0100
commitabd44fc5f4211a0684da4b37a15c3d07b17a436e (patch)
treeb178fc869b8c53ad7d9bf0229999f0c1652f50d2
parentec67cdf98ac081e1392a324ec47a0543743f697e (diff)
downloadrust-abd44fc5f4211a0684da4b37a15c3d07b17a436e.tar.gz
rust-abd44fc5f4211a0684da4b37a15c3d07b17a436e.zip
Add `f16` and `f128` inline ASM support for `aarch64`
-rw-r--r--compiler/rustc_codegen_llvm/src/asm.rs14
-rw-r--r--compiler/rustc_target/src/asm/aarch64.rs6
-rw-r--r--tests/assembly/asm/aarch64-types.rs133
-rw-r--r--tests/ui/asm/aarch64/type-check-3.stderr2
-rw-r--r--tests/ui/asm/aarch64/type-f16.rs14
5 files changed, 130 insertions, 39 deletions
diff --git a/compiler/rustc_codegen_llvm/src/asm.rs b/compiler/rustc_codegen_llvm/src/asm.rs
index f931698c38f..1d91c3fb17d 100644
--- a/compiler/rustc_codegen_llvm/src/asm.rs
+++ b/compiler/rustc_codegen_llvm/src/asm.rs
@@ -913,8 +913,10 @@ fn llvm_asm_scalar_type<'ll>(cx: &CodegenCx<'ll, '_>, scalar: Scalar) -> &'ll Ty
         Primitive::Int(Integer::I16, _) => cx.type_i16(),
         Primitive::Int(Integer::I32, _) => cx.type_i32(),
         Primitive::Int(Integer::I64, _) => cx.type_i64(),
+        Primitive::Float(Float::F16) => cx.type_f16(),
         Primitive::Float(Float::F32) => cx.type_f32(),
         Primitive::Float(Float::F64) => cx.type_f64(),
+        Primitive::Float(Float::F128) => cx.type_f128(),
         // FIXME(erikdesjardins): handle non-default addrspace ptr sizes
         Primitive::Pointer(_) => cx.type_from_integer(dl.ptr_sized_integer()),
         _ => unreachable!(),
@@ -948,7 +950,9 @@ fn llvm_fixup_input<'ll, 'tcx>(
                 value
             }
         }
-        (InlineAsmRegClass::AArch64(AArch64InlineAsmRegClass::vreg_low16), Abi::Scalar(s)) => {
+        (InlineAsmRegClass::AArch64(AArch64InlineAsmRegClass::vreg_low16), Abi::Scalar(s))
+            if s.primitive() != Primitive::Float(Float::F128) =>
+        {
             let elem_ty = llvm_asm_scalar_type(bx.cx, s);
             let count = 16 / layout.size.bytes();
             let vec_ty = bx.cx.type_vector(elem_ty, count);
@@ -1090,7 +1094,9 @@ fn llvm_fixup_output<'ll, 'tcx>(
                 value
             }
         }
-        (InlineAsmRegClass::AArch64(AArch64InlineAsmRegClass::vreg_low16), Abi::Scalar(s)) => {
+        (InlineAsmRegClass::AArch64(AArch64InlineAsmRegClass::vreg_low16), Abi::Scalar(s))
+            if s.primitive() != Primitive::Float(Float::F128) =>
+        {
             value = bx.extract_element(value, bx.const_i32(0));
             if let Primitive::Pointer(_) = s.primitive() {
                 value = bx.inttoptr(value, layout.llvm_type(bx.cx));
@@ -1222,7 +1228,9 @@ fn llvm_fixup_output_type<'ll, 'tcx>(
                 layout.llvm_type(cx)
             }
         }
-        (InlineAsmRegClass::AArch64(AArch64InlineAsmRegClass::vreg_low16), Abi::Scalar(s)) => {
+        (InlineAsmRegClass::AArch64(AArch64InlineAsmRegClass::vreg_low16), Abi::Scalar(s))
+            if s.primitive() != Primitive::Float(Float::F128) =>
+        {
             let elem_ty = llvm_asm_scalar_type(cx, s);
             let count = 16 / layout.size.bytes();
             cx.type_vector(elem_ty, count)
diff --git a/compiler/rustc_target/src/asm/aarch64.rs b/compiler/rustc_target/src/asm/aarch64.rs
index 041582b7df9..daf5162e8ac 100644
--- a/compiler/rustc_target/src/asm/aarch64.rs
+++ b/compiler/rustc_target/src/asm/aarch64.rs
@@ -61,9 +61,9 @@ impl AArch64InlineAsmRegClass {
         match self {
             Self::reg => types! { _: I8, I16, I32, I64, F16, F32, F64; },
             Self::vreg | Self::vreg_low16 => types! {
-                neon: I8, I16, I32, I64, F16, F32, F64,
-                    VecI8(8), VecI16(4), VecI32(2), VecI64(1), VecF32(2), VecF64(1),
-                    VecI8(16), VecI16(8), VecI32(4), VecI64(2), VecF16(4),VecF16(8), VecF32(4), VecF64(2);
+                neon: I8, I16, I32, I64, F16, F32, F64, F128,
+                    VecI8(8), VecI16(4), VecI32(2), VecI64(1), VecF16(4), VecF32(2), VecF64(1),
+                    VecI8(16), VecI16(8), VecI32(4), VecI64(2), VecF16(8), VecF32(4), VecF64(2);
             },
             Self::preg => &[],
         }
diff --git a/tests/assembly/asm/aarch64-types.rs b/tests/assembly/asm/aarch64-types.rs
index 4b506f180d8..f36345670e3 100644
--- a/tests/assembly/asm/aarch64-types.rs
+++ b/tests/assembly/asm/aarch64-types.rs
@@ -5,10 +5,12 @@
 //@ [arm64ec] compile-flags: --target arm64ec-pc-windows-msvc
 //@ [arm64ec] needs-llvm-components: aarch64
 
-#![feature(no_core, lang_items, rustc_attrs, repr_simd, asm_experimental_arch, f16)]
+#![feature(no_core, lang_items, rustc_attrs, repr_simd, asm_experimental_arch, f16, f128)]
 #![crate_type = "rlib"]
 #![no_core]
 #![allow(asm_sub_register, non_camel_case_types)]
+// FIXME(f16_f128): Only needed for FIXME in check! and check_reg!
+#![feature(auto_traits)]
 
 #[rustc_builtin_macro]
 macro_rules! asm {
@@ -41,8 +43,6 @@ pub struct i64x1(i64);
 #[repr(simd)]
 pub struct f16x4(f16, f16, f16, f16);
 #[repr(simd)]
-pub struct f16x8(f16, f16, f16, f16, f16, f16, f16, f16);
-#[repr(simd)]
 pub struct f32x2(f32, f32);
 #[repr(simd)]
 pub struct f64x1(f64);
@@ -55,6 +55,8 @@ pub struct i32x4(i32, i32, i32, i32);
 #[repr(simd)]
 pub struct i64x2(i64, i64);
 #[repr(simd)]
+pub struct f16x8(f16, f16, f16, f16, f16, f16, f16, f16);
+#[repr(simd)]
 pub struct f32x4(f32, f32, f32, f32);
 #[repr(simd)]
 pub struct f64x2(f64, f64);
@@ -66,13 +68,14 @@ impl Copy for i32 {}
 impl Copy for f32 {}
 impl Copy for i64 {}
 impl Copy for f64 {}
+impl Copy for f128 {}
 impl Copy for ptr {}
 impl Copy for i8x8 {}
 impl Copy for i16x4 {}
 impl Copy for i32x2 {}
 impl Copy for i64x1 {}
-impl Copy for f32x2 {}
 impl Copy for f16x4 {}
+impl Copy for f32x2 {}
 impl Copy for f64x1 {}
 impl Copy for i8x16 {}
 impl Copy for i16x8 {}
@@ -82,6 +85,12 @@ impl Copy for f16x8 {}
 impl Copy for f32x4 {}
 impl Copy for f64x2 {}
 
+// FIXME(f16_f128): Only needed for FIXME in check! and check_reg!
+#[lang = "freeze"]
+unsafe auto trait Freeze {}
+#[lang = "unpin"]
+auto trait Unpin {}
+
 extern "C" {
     fn extern_func();
     static extern_static: u8;
@@ -118,38 +127,44 @@ pub unsafe fn issue_75761() {
 
 macro_rules! check {
     ($func:ident $ty:ident $class:ident $mov:literal $modifier:literal) => {
+        // FIXME(f16_f128): Change back to `$func(x: $ty) -> $ty` once arm64ec can pass and return
+        // `f16` and `f128` without LLVM erroring.
+        // LLVM issue: <https://github.com/llvm/llvm-project/issues/94434>
         #[no_mangle]
-        pub unsafe fn $func(x: $ty) -> $ty {
+        pub unsafe fn $func(inp: &$ty, out: &mut $ty) {
             // Hack to avoid function merging
             extern "Rust" {
                 fn dont_merge(s: &str);
             }
             dont_merge(stringify!($func));
 
+            let x = *inp;
             let y;
             asm!(
                 concat!($mov, " {:", $modifier, "}, {:", $modifier, "}"),
                 out($class) y,
                 in($class) x
             );
-            y
+            *out = y;
         }
     };
 }
 
 macro_rules! check_reg {
     ($func:ident $ty:ident $reg:tt $mov:literal) => {
+        // FIXME(f16_f128): See FIXME in `check!`
         #[no_mangle]
-        pub unsafe fn $func(x: $ty) -> $ty {
+        pub unsafe fn $func(inp: &$ty, out: &mut $ty) {
             // Hack to avoid function merging
             extern "Rust" {
                 fn dont_merge(s: &str);
             }
             dont_merge(stringify!($func));
 
+            let x = *inp;
             let y;
             asm!(concat!($mov, " ", $reg, ", ", $reg), lateout($reg) y, in($reg) x);
-            y
+            *out = y;
         }
     };
 }
@@ -166,18 +181,18 @@ check!(reg_i8 i8 reg "mov" "");
 // CHECK: //NO_APP
 check!(reg_i16 i16 reg "mov" "");
 
+// CHECK-LABEL: {{("#)?}}reg_f16{{"?}}
+// CHECK: //APP
+// CHECK: mov x{{[0-9]+}}, x{{[0-9]+}}
+// CHECK: //NO_APP
+check!(reg_f16 f16 reg "mov" "");
+
 // CHECK-LABEL: {{("#)?}}reg_i32{{"?}}
 // CHECK: //APP
 // CHECK: mov x{{[0-9]+}}, x{{[0-9]+}}
 // CHECK: //NO_APP
 check!(reg_i32 i32 reg "mov" "");
 
-// CHECK-LABEL: reg_f16:
-// CHECK: @APP
-// CHECK: mov {{[a-z0-9]+}}, {{[a-z0-9]+}}
-// CHECK: @NO_APP
-check!(reg_f16 f16 reg "mov");
-
 // CHECK-LABEL: {{("#)?}}reg_f32{{"?}}
 // CHECK: //APP
 // CHECK: mov x{{[0-9]+}}, x{{[0-9]+}}
@@ -214,6 +229,12 @@ check!(vreg_i8 i8 vreg "fmov" "s");
 // CHECK: //NO_APP
 check!(vreg_i16 i16 vreg "fmov" "s");
 
+// CHECK-LABEL: {{("#)?}}vreg_f16{{"?}}
+// CHECK: //APP
+// CHECK: fmov s{{[0-9]+}}, s{{[0-9]+}}
+// CHECK: //NO_APP
+check!(vreg_f16 f16 vreg "fmov" "s");
+
 // CHECK-LABEL: {{("#)?}}vreg_i32{{"?}}
 // CHECK: //APP
 // CHECK: fmov s{{[0-9]+}}, s{{[0-9]+}}
@@ -238,6 +259,12 @@ check!(vreg_i64 i64 vreg "fmov" "s");
 // CHECK: //NO_APP
 check!(vreg_f64 f64 vreg "fmov" "s");
 
+// CHECK-LABEL: {{("#)?}}vreg_f128{{"?}}
+// CHECK: //APP
+// CHECK: fmov s{{[0-9]+}}, s{{[0-9]+}}
+// CHECK: //NO_APP
+check!(vreg_f128 f128 vreg "fmov" "s");
+
 // CHECK-LABEL: {{("#)?}}vreg_ptr{{"?}}
 // CHECK: //APP
 // CHECK: fmov s{{[0-9]+}}, s{{[0-9]+}}
@@ -268,19 +295,11 @@ check!(vreg_i32x2 i32x2 vreg "fmov" "s");
 // CHECK: //NO_APP
 check!(vreg_i64x1 i64x1 vreg "fmov" "s");
 
-// neon-LABEL: vreg_f16x4:
-// neon: @APP
-// neon: vmov.f64 d{{[0-9]+}}, d{{[0-9]+}}
-// neon: @NO_APP
-#[cfg(neon)]
-check!(vreg_f16x4 f16x4 vreg "vmov.f64");
-
-// neon-LABEL: vreg_f16x8:
-// neon: @APP
-// neon: vorr q{{[0-9]+}}, q{{[0-9]+}}, q{{[0-9]+}}
-// neon: @NO_APP
-#[cfg(neon)]
-check!(vreg_f16x8 f16x8 vreg "vmov");
+// CHECK-LABEL: {{("#)?}}vreg_f16x4{{"?}}
+// CHECK: //APP
+// CHECK: fmov s{{[0-9]+}}, s{{[0-9]+}}
+// CHECK: //NO_APP
+check!(vreg_f16x4 f16x4 vreg "fmov" "s");
 
 // CHECK-LABEL: {{("#)?}}vreg_f32x2{{"?}}
 // CHECK: //APP
@@ -318,6 +337,12 @@ check!(vreg_i32x4 i32x4 vreg "fmov" "s");
 // CHECK: //NO_APP
 check!(vreg_i64x2 i64x2 vreg "fmov" "s");
 
+// CHECK-LABEL: {{("#)?}}vreg_f16x8{{"?}}
+// CHECK: //APP
+// CHECK: fmov s{{[0-9]+}}, s{{[0-9]+}}
+// CHECK: //NO_APP
+check!(vreg_f16x8 f16x8 vreg "fmov" "s");
+
 // CHECK-LABEL: {{("#)?}}vreg_f32x4{{"?}}
 // CHECK: //APP
 // CHECK: fmov s{{[0-9]+}}, s{{[0-9]+}}
@@ -342,6 +367,12 @@ check!(vreg_low16_i8 i8 vreg_low16 "fmov" "s");
 // CHECK: //NO_APP
 check!(vreg_low16_i16 i16 vreg_low16 "fmov" "s");
 
+// CHECK-LABEL: {{("#)?}}vreg_low16_f16{{"?}}
+// CHECK: //APP
+// CHECK: fmov s{{[0-9]+}}, s{{[0-9]+}}
+// CHECK: //NO_APP
+check!(vreg_low16_f16 f16 vreg_low16 "fmov" "s");
+
 // CHECK-LABEL: {{("#)?}}vreg_low16_f32{{"?}}
 // CHECK: //APP
 // CHECK: fmov s{{[0-9]+}}, s{{[0-9]+}}
@@ -360,6 +391,12 @@ check!(vreg_low16_i64 i64 vreg_low16 "fmov" "s");
 // CHECK: //NO_APP
 check!(vreg_low16_f64 f64 vreg_low16 "fmov" "s");
 
+// CHECK-LABEL: {{("#)?}}vreg_low16_f128{{"?}}
+// CHECK: //APP
+// CHECK: fmov s{{[0-9]+}}, s{{[0-9]+}}
+// CHECK: //NO_APP
+check!(vreg_low16_f128 f128 vreg_low16 "fmov" "s");
+
 // CHECK-LABEL: {{("#)?}}vreg_low16_ptr{{"?}}
 // CHECK: //APP
 // CHECK: fmov s{{[0-9]+}}, s{{[0-9]+}}
@@ -390,6 +427,12 @@ check!(vreg_low16_i32x2 i32x2 vreg_low16 "fmov" "s");
 // CHECK: //NO_APP
 check!(vreg_low16_i64x1 i64x1 vreg_low16 "fmov" "s");
 
+// CHECK-LABEL: {{("#)?}}vreg_low16_f16x4{{"?}}
+// CHECK: //APP
+// CHECK: fmov s{{[0-9]+}}, s{{[0-9]+}}
+// CHECK: //NO_APP
+check!(vreg_low16_f16x4 f16x4 vreg_low16 "fmov" "s");
+
 // CHECK-LABEL: {{("#)?}}vreg_low16_f32x2{{"?}}
 // CHECK: //APP
 // CHECK: fmov s{{[0-9]+}}, s{{[0-9]+}}
@@ -426,6 +469,12 @@ check!(vreg_low16_i32x4 i32x4 vreg_low16 "fmov" "s");
 // CHECK: //NO_APP
 check!(vreg_low16_i64x2 i64x2 vreg_low16 "fmov" "s");
 
+// CHECK-LABEL: {{("#)?}}vreg_low16_f16x8{{"?}}
+// CHECK: //APP
+// CHECK: fmov s{{[0-9]+}}, s{{[0-9]+}}
+// CHECK: //NO_APP
+check!(vreg_low16_f16x8 f16x8 vreg_low16 "fmov" "s");
+
 // CHECK-LABEL: {{("#)?}}vreg_low16_f32x4{{"?}}
 // CHECK: //APP
 // CHECK: fmov s{{[0-9]+}}, s{{[0-9]+}}
@@ -450,6 +499,12 @@ check_reg!(x0_i8 i8 "x0" "mov");
 // CHECK: //NO_APP
 check_reg!(x0_i16 i16 "x0" "mov");
 
+// CHECK-LABEL: {{("#)?}}x0_f16{{"?}}
+// CHECK: //APP
+// CHECK: mov x{{[0-9]+}}, x{{[0-9]+}}
+// CHECK: //NO_APP
+check_reg!(x0_f16 f16 "x0" "mov");
+
 // CHECK-LABEL: {{("#)?}}x0_i32{{"?}}
 // CHECK: //APP
 // CHECK: mov x{{[0-9]+}}, x{{[0-9]+}}
@@ -492,6 +547,12 @@ check_reg!(v0_i8 i8 "s0" "fmov");
 // CHECK: //NO_APP
 check_reg!(v0_i16 i16 "s0" "fmov");
 
+// CHECK-LABEL: {{("#)?}}v0_f16{{"?}}
+// CHECK: //APP
+// CHECK: fmov s0, s0
+// CHECK: //NO_APP
+check_reg!(v0_f16 f16 "s0" "fmov");
+
 // CHECK-LABEL: {{("#)?}}v0_i32{{"?}}
 // CHECK: //APP
 // CHECK: fmov s0, s0
@@ -516,6 +577,12 @@ check_reg!(v0_i64 i64 "s0" "fmov");
 // CHECK: //NO_APP
 check_reg!(v0_f64 f64 "s0" "fmov");
 
+// CHECK-LABEL: {{("#)?}}v0_f128{{"?}}
+// CHECK: //APP
+// CHECK: fmov s0, s0
+// CHECK: //NO_APP
+check_reg!(v0_f128 f128 "s0" "fmov");
+
 // CHECK-LABEL: {{("#)?}}v0_ptr{{"?}}
 // CHECK: //APP
 // CHECK: fmov s0, s0
@@ -546,6 +613,12 @@ check_reg!(v0_i32x2 i32x2 "s0" "fmov");
 // CHECK: //NO_APP
 check_reg!(v0_i64x1 i64x1 "s0" "fmov");
 
+// CHECK-LABEL: {{("#)?}}v0_f16x4{{"?}}
+// CHECK: //APP
+// CHECK: fmov s0, s0
+// CHECK: //NO_APP
+check_reg!(v0_f16x4 f16x4 "s0" "fmov");
+
 // CHECK-LABEL: {{("#)?}}v0_f32x2{{"?}}
 // CHECK: //APP
 // CHECK: fmov s0, s0
@@ -582,6 +655,12 @@ check_reg!(v0_i32x4 i32x4 "s0" "fmov");
 // CHECK: //NO_APP
 check_reg!(v0_i64x2 i64x2 "s0" "fmov");
 
+// CHECK-LABEL: {{("#)?}}v0_f16x8{{"?}}
+// CHECK: //APP
+// CHECK: fmov s0, s0
+// CHECK: //NO_APP
+check_reg!(v0_f16x8 f16x8 "s0" "fmov");
+
 // CHECK-LABEL: {{("#)?}}v0_f32x4{{"?}}
 // CHECK: //APP
 // CHECK: fmov s0, s0
diff --git a/tests/ui/asm/aarch64/type-check-3.stderr b/tests/ui/asm/aarch64/type-check-3.stderr
index 4bd97b93867..9d84d2666b3 100644
--- a/tests/ui/asm/aarch64/type-check-3.stderr
+++ b/tests/ui/asm/aarch64/type-check-3.stderr
@@ -111,7 +111,7 @@ error: type `Simd256bit` cannot be used with this register class
 LL |         asm!("{}", in(vreg) f64x4);
    |                             ^^^^^
    |
-   = note: register class `vreg` supports these types: i8, i16, i32, i64, f16, f32, f64, i8x8, i16x4, i32x2, i64x1, f32x2, f64x1, i8x16, i16x8, i32x4, i64x2, f16x4, f16x8, f32x4, f64x2
+   = note: register class `vreg` supports these types: i8, i16, i32, i64, f16, f32, f64, f128, i8x8, i16x4, i32x2, i64x1, f16x4, f32x2, f64x1, i8x16, i16x8, i32x4, i64x2, f16x8, f32x4, f64x2
 
 error: incompatible types for asm inout argument
   --> $DIR/type-check-3.rs:88:33
diff --git a/tests/ui/asm/aarch64/type-f16.rs b/tests/ui/asm/aarch64/type-f16.rs
index 763ea4684da..e62d8130c93 100644
--- a/tests/ui/asm/aarch64/type-f16.rs
+++ b/tests/ui/asm/aarch64/type-f16.rs
@@ -1,21 +1,25 @@
 //@ only-aarch64
 //@ run-pass
+//@ needs-asm-support
+
+#![feature(f16)]
 
-#![feature(f16, f128)]
 use std::arch::asm;
+
 #[inline(never)]
 pub fn f32_to_f16_asm(a: f32) -> f16 {
     let ret: f16;
     unsafe {
         asm!(
-        "fcvt    {ret:h}, {a:s}",
-        a = in(vreg) a,
-        ret = lateout(vreg) ret,
-        options(nomem, nostack),
+            "fcvt {ret:h}, {a:s}",
+            a = in(vreg) a,
+            ret = lateout(vreg) ret,
+            options(nomem, nostack),
         );
     }
     ret
 }
+
 fn main() {
     assert_eq!(f32_to_f16_asm(1.0 as f32), 1.0);
 }