about summary refs log tree commit diff
diff options
context:
space:
mode:
authorbjorn3 <17426603+bjorn3@users.noreply.github.com>2025-02-07 11:01:59 +0000
committerbjorn3 <17426603+bjorn3@users.noreply.github.com>2025-02-07 11:01:59 +0000
commitb004312ee4c8418e5a42cc25b971fa5fc5ac88b7 (patch)
tree72f09546187fc628bbcde6ee294eb19a85b514da
parented91b731796942a486a29b568e373a4276161863 (diff)
downloadrust-b004312ee4c8418e5a42cc25b971fa5fc5ac88b7.tar.gz
rust-b004312ee4c8418e5a42cc25b971fa5fc5ac88b7.zip
Implement arm64 vaddlvq_u8 and vld1q_u8_x4 vendor intrinsics
This is required for using the bytecount crate on arm64.
-rw-r--r--src/intrinsics/llvm_aarch64.rs24
1 files changed, 24 insertions, 0 deletions
diff --git a/src/intrinsics/llvm_aarch64.rs b/src/intrinsics/llvm_aarch64.rs
index 39f6763d9f2..4c59c81296b 100644
--- a/src/intrinsics/llvm_aarch64.rs
+++ b/src/intrinsics/llvm_aarch64.rs
@@ -17,6 +17,14 @@ pub(crate) fn codegen_aarch64_llvm_intrinsic_call<'tcx>(
             fx.bcx.ins().fence();
         }
 
+        "llvm.aarch64.neon.ld1x4.v16i8.p0i8" => {
+            intrinsic_args!(fx, args => (ptr); intrinsic);
+
+            let ptr = ptr.load_scalar(fx);
+            let val = CPlace::for_ptr(Pointer::new(ptr), ret.layout()).to_cvalue(fx);
+            ret.write_cvalue(fx, val);
+        }
+
         _ if intrinsic.starts_with("llvm.aarch64.neon.abs.v") => {
             intrinsic_args!(fx, args => (a); intrinsic);
 
@@ -115,6 +123,22 @@ pub(crate) fn codegen_aarch64_llvm_intrinsic_call<'tcx>(
             );
         }
 
+        "llvm.aarch64.neon.uaddlv.i32.v16i8" => {
+            intrinsic_args!(fx, args => (v); intrinsic);
+
+            let mut res_val = fx.bcx.ins().iconst(types::I16, 0);
+            for lane_idx in 0..16 {
+                let lane = v.value_lane(fx, lane_idx).load_scalar(fx);
+                let lane = fx.bcx.ins().uextend(types::I16, lane);
+                res_val = fx.bcx.ins().iadd(res_val, lane);
+            }
+            let res = CValue::by_val(
+                fx.bcx.ins().uextend(types::I32, res_val),
+                fx.layout_of(fx.tcx.types.u32),
+            );
+            ret.write_cvalue(fx, res);
+        }
+
         _ if intrinsic.starts_with("llvm.aarch64.neon.faddv.f32.v") => {
             intrinsic_args!(fx, args => (v); intrinsic);