about summary refs log tree commit diff
path: root/compiler/rustc_codegen_cranelift/src/intrinsics/simd.rs
diff options
context:
space:
mode:
authorJakub Okoński <jakub@okonski.org>2023-11-15 23:08:02 +0100
committerJakub Okoński <jakub@okonski.org>2023-12-09 12:36:08 +0100
commit97ae5095f52100b98170ab476e516d2be5b2c297 (patch)
tree5d67c444603f183d86c69b19a8bfeb24dd6e6264 /compiler/rustc_codegen_cranelift/src/intrinsics/simd.rs
parentc41669970a181b07ecf57c4607e50706f5d1e0c8 (diff)
downloadrust-97ae5095f52100b98170ab476e516d2be5b2c297.tar.gz
rust-97ae5095f52100b98170ab476e516d2be5b2c297.zip
Add simd_masked_{load,store} platform-intrinsics
This maps to the LLVM intrinsics: llvm.masked.load and llvm.masked.store
Diffstat (limited to 'compiler/rustc_codegen_cranelift/src/intrinsics/simd.rs')
-rw-r--r--compiler/rustc_codegen_cranelift/src/intrinsics/simd.rs52
1 files changed, 51 insertions, 1 deletions
diff --git a/compiler/rustc_codegen_cranelift/src/intrinsics/simd.rs b/compiler/rustc_codegen_cranelift/src/intrinsics/simd.rs
index 0bd211fd614..5997e6026b4 100644
--- a/compiler/rustc_codegen_cranelift/src/intrinsics/simd.rs
+++ b/compiler/rustc_codegen_cranelift/src/intrinsics/simd.rs
@@ -1,5 +1,6 @@
 //! Codegen `extern "platform-intrinsic"` intrinsics.
 
+use cranelift_codegen::ir::immediates::Offset32;
 use rustc_middle::ty::GenericArgsRef;
 use rustc_span::Symbol;
 use rustc_target::abi::Endian;
@@ -1008,8 +1009,57 @@ pub(super) fn codegen_simd_intrinsic_call<'tcx>(
             }
         }
 
+        sym::simd_masked_load => {
+            intrinsic_args!(fx, args => (mask, ptr, val); intrinsic);
+
+            let (val_lane_count, val_lane_ty) = val.layout().ty.simd_size_and_type(fx.tcx);
+            let (mask_lane_count, _mask_lane_ty) = mask.layout().ty.simd_size_and_type(fx.tcx);
+            let (ret_lane_count, ret_lane_ty) = ret.layout().ty.simd_size_and_type(fx.tcx);
+            assert_eq!(val_lane_count, mask_lane_count);
+            assert_eq!(val_lane_count, ret_lane_count);
+
+            let lane_clif_ty = fx.clif_type(val_lane_ty).unwrap();
+            let ret_lane_layout = fx.layout_of(ret_lane_ty);
+            let ptr_val = ptr.load_scalar(fx);
+
+            for lane_idx in 0..ret_lane_count {
+                let val_lane = val.value_lane(fx, lane_idx).load_scalar(fx);
+                let mask_lane = mask.value_lane(fx, lane_idx).load_scalar(fx);
+
+                let if_enabled = fx.bcx.create_block();
+                let if_disabled = fx.bcx.create_block();
+                let next = fx.bcx.create_block();
+                let res_lane = fx.bcx.append_block_param(next, lane_clif_ty);
+
+                fx.bcx.ins().brif(mask_lane, if_enabled, &[], if_disabled, &[]);
+                fx.bcx.seal_block(if_enabled);
+                fx.bcx.seal_block(if_disabled);
+
+                fx.bcx.switch_to_block(if_enabled);
+                let offset = lane_idx as i32 * lane_clif_ty.bytes() as i32;
+                let res = fx.bcx.ins().load(
+                    lane_clif_ty,
+                    MemFlags::trusted(),
+                    ptr_val,
+                    Offset32::new(offset),
+                );
+                fx.bcx.ins().jump(next, &[res]);
+
+                fx.bcx.switch_to_block(if_disabled);
+                fx.bcx.ins().jump(next, &[val_lane]);
+
+                fx.bcx.seal_block(next);
+                fx.bcx.switch_to_block(next);
+
+                fx.bcx.ins().nop();
+
+                ret.place_lane(fx, lane_idx)
+                    .write_cvalue(fx, CValue::by_val(res_lane, ret_lane_layout));
+            }
+        }
+
         sym::simd_scatter => {
-            intrinsic_args!(fx, args => (val, ptr, mask); intrinsic);
+            intrinsic_args!(fx, args => (mask, ptr, val); intrinsic);
 
             let (val_lane_count, _val_lane_ty) = val.layout().ty.simd_size_and_type(fx.tcx);
             let (ptr_lane_count, _ptr_lane_ty) = ptr.layout().ty.simd_size_and_type(fx.tcx);