about summary refs log tree commit diff
diff options
context:
space:
mode:
authorRalf Jung <post@ralfj.de>2025-02-24 13:18:37 +0000
committerGitHub <noreply@github.com>2025-02-24 13:18:37 +0000
commit913612c59e35e6c85182c55a5f7bface4a828a32 (patch)
treeffbdebaa80d91ea9e5f03005b9260b6f651d5ad9
parenta3dd76430735bfa192697c04c1a6be232948fff5 (diff)
parent4303a14bdd8388e5934c0d823c54b8f935b2e1b3 (diff)
downloadrust-913612c59e35e6c85182c55a5f7bface4a828a32.tar.gz
rust-913612c59e35e6c85182c55a5f7bface4a828a32.zip
Merge pull request #4193 from bjorn3/arm64_vpmaxq_u8
Implement vpmaxq_u8 on aarch64
-rw-r--r--src/tools/miri/src/shims/aarch64.rs78
-rw-r--r--src/tools/miri/src/shims/foreign_items.rs18
-rw-r--r--src/tools/miri/src/shims/mod.rs1
-rw-r--r--src/tools/miri/tests/pass/shims/aarch64/intrinsics-aarch64-neon.rs40
4 files changed, 124 insertions, 13 deletions
diff --git a/src/tools/miri/src/shims/aarch64.rs b/src/tools/miri/src/shims/aarch64.rs
new file mode 100644
index 00000000000..7cccc9e51d8
--- /dev/null
+++ b/src/tools/miri/src/shims/aarch64.rs
@@ -0,0 +1,78 @@
+use rustc_middle::mir::BinOp;
+use rustc_middle::ty::Ty;
+use rustc_span::Symbol;
+use rustc_target::callconv::{Conv, FnAbi};
+
+use crate::*;
+
+impl<'tcx> EvalContextExt<'tcx> for crate::MiriInterpCx<'tcx> {}
+pub(super) trait EvalContextExt<'tcx>: crate::MiriInterpCxExt<'tcx> {
+    fn emulate_aarch64_intrinsic(
+        &mut self,
+        link_name: Symbol,
+        abi: &FnAbi<'tcx, Ty<'tcx>>,
+        args: &[OpTy<'tcx>],
+        dest: &MPlaceTy<'tcx>,
+    ) -> InterpResult<'tcx, EmulateItemResult> {
+        let this = self.eval_context_mut();
+        // Prefix should have already been checked.
+        let unprefixed_name = link_name.as_str().strip_prefix("llvm.aarch64.").unwrap();
+        match unprefixed_name {
+            "isb" => {
+                let [arg] = this.check_shim(abi, Conv::C, link_name, args)?;
+                let arg = this.read_scalar(arg)?.to_i32()?;
+                match arg {
+                    // SY ("full system scope")
+                    15 => {
+                        this.yield_active_thread();
+                    }
+                    _ => {
+                        throw_unsup_format!("unsupported llvm.aarch64.isb argument {}", arg);
+                    }
+                }
+            }
+
+            // Used to implement the vpmaxq_u8 function.
+            // Computes the maximum of adjacent pairs; the first half of the output is produced from the
+            // `left` input, the second half of the output from the `right` input.
+            // https://developer.arm.com/architectures/instruction-sets/intrinsics/vpmaxq_u8
+            "neon.umaxp.v16i8" => {
+                let [left, right] = this.check_shim(abi, Conv::C, link_name, args)?;
+
+                let (left, left_len) = this.project_to_simd(left)?;
+                let (right, right_len) = this.project_to_simd(right)?;
+                let (dest, lane_count) = this.project_to_simd(dest)?;
+                assert_eq!(left_len, right_len);
+                assert_eq!(lane_count, left_len);
+
+                for lane_idx in 0..lane_count {
+                    let src = if lane_idx < (lane_count / 2) { &left } else { &right };
+                    let src_idx = lane_idx.strict_rem(lane_count / 2);
+
+                    let lhs_lane =
+                        this.read_immediate(&this.project_index(src, src_idx.strict_mul(2))?)?;
+                    let rhs_lane = this.read_immediate(
+                        &this.project_index(src, src_idx.strict_mul(2).strict_add(1))?,
+                    )?;
+
+                    // Compute `if lhs > rhs { lhs } else { rhs }`, i.e., `max`.
+                    let res_lane = if this
+                        .binary_op(BinOp::Gt, &lhs_lane, &rhs_lane)?
+                        .to_scalar()
+                        .to_bool()?
+                    {
+                        lhs_lane
+                    } else {
+                        rhs_lane
+                    };
+
+                    let dest = this.project_index(&dest, lane_idx)?;
+                    this.write_immediate(*res_lane, &dest)?;
+                }
+            }
+
+            _ => return interp_ok(EmulateItemResult::NotSupported),
+        }
+        interp_ok(EmulateItemResult::NeedsReturn)
+    }
+}
diff --git a/src/tools/miri/src/shims/foreign_items.rs b/src/tools/miri/src/shims/foreign_items.rs
index ec8f6663822..ee755e4c182 100644
--- a/src/tools/miri/src/shims/foreign_items.rs
+++ b/src/tools/miri/src/shims/foreign_items.rs
@@ -981,20 +981,12 @@ trait EvalContextExtPriv<'tcx>: crate::MiriInterpCxExt<'tcx> {
                     this, link_name, abi, args, dest,
                 );
             }
-            // FIXME: Move these to an `arm` submodule.
-            "llvm.aarch64.isb" if this.tcx.sess.target.arch == "aarch64" => {
-                let [arg] = this.check_shim(abi, Conv::C, link_name, args)?;
-                let arg = this.read_scalar(arg)?.to_i32()?;
-                match arg {
-                    // SY ("full system scope")
-                    15 => {
-                        this.yield_active_thread();
-                    }
-                    _ => {
-                        throw_unsup_format!("unsupported llvm.aarch64.isb argument {}", arg);
-                    }
-                }
+            name if name.starts_with("llvm.aarch64.") && this.tcx.sess.target.arch == "aarch64" => {
+                return shims::aarch64::EvalContextExt::emulate_aarch64_intrinsic(
+                    this, link_name, abi, args, dest,
+                );
             }
+            // FIXME: Move this to an `arm` submodule.
             "llvm.arm.hint" if this.tcx.sess.target.arch == "arm" => {
                 let [arg] = this.check_shim(abi, Conv::C, link_name, args)?;
                 let arg = this.read_scalar(arg)?.to_i32()?;
diff --git a/src/tools/miri/src/shims/mod.rs b/src/tools/miri/src/shims/mod.rs
index 61681edcf76..b498551ace3 100644
--- a/src/tools/miri/src/shims/mod.rs
+++ b/src/tools/miri/src/shims/mod.rs
@@ -1,5 +1,6 @@
 #![warn(clippy::arithmetic_side_effects)]
 
+mod aarch64;
 mod alloc;
 mod backtrace;
 mod files;
diff --git a/src/tools/miri/tests/pass/shims/aarch64/intrinsics-aarch64-neon.rs b/src/tools/miri/tests/pass/shims/aarch64/intrinsics-aarch64-neon.rs
new file mode 100644
index 00000000000..84485dbad8c
--- /dev/null
+++ b/src/tools/miri/tests/pass/shims/aarch64/intrinsics-aarch64-neon.rs
@@ -0,0 +1,40 @@
+// We're testing aarch64 target specific features
+//@only-target: aarch64
+//@compile-flags: -C target-feature=+neon
+
+use std::arch::aarch64::*;
+use std::arch::is_aarch64_feature_detected;
+
+fn main() {
+    assert!(is_aarch64_feature_detected!("neon"));
+
+    unsafe {
+        test_neon();
+    }
+}
+
+#[target_feature(enable = "neon")]
+unsafe fn test_neon() {
+    // Adapted from library/stdarch/crates/core_arch/src/aarch64/neon/mod.rs
+    unsafe fn test_vpmaxq_u8() {
+        let a = vld1q_u8([1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8].as_ptr());
+        let b = vld1q_u8([0, 3, 2, 5, 4, 7, 6, 9, 0, 3, 2, 5, 4, 7, 6, 9].as_ptr());
+        let e = [2, 4, 6, 8, 2, 4, 6, 8, 3, 5, 7, 9, 3, 5, 7, 9];
+        let mut r = [0; 16];
+        vst1q_u8(r.as_mut_ptr(), vpmaxq_u8(a, b));
+        assert_eq!(r, e);
+    }
+    test_vpmaxq_u8();
+
+    unsafe fn test_vpmaxq_u8_is_unsigned() {
+        let a = vld1q_u8(
+            [255, 0, 253, 252, 251, 250, 249, 248, 255, 254, 253, 252, 251, 250, 249, 248].as_ptr(),
+        );
+        let b = vld1q_u8([254, 3, 2, 5, 4, 7, 6, 9, 0, 3, 2, 5, 4, 7, 6, 9].as_ptr());
+        let e = [255, 253, 251, 249, 255, 253, 251, 249, 254, 5, 7, 9, 3, 5, 7, 9];
+        let mut r = [0; 16];
+        vst1q_u8(r.as_mut_ptr(), vpmaxq_u8(a, b));
+        assert_eq!(r, e);
+    }
+    test_vpmaxq_u8_is_unsigned();
+}