about summary refs log tree commit diff
path: root/compiler/rustc_codegen_cranelift/src
diff options
context:
space:
mode:
authorbjorn3 <17426603+bjorn3@users.noreply.github.com>2023-06-15 17:56:01 +0000
committerbjorn3 <17426603+bjorn3@users.noreply.github.com>2023-06-15 17:56:01 +0000
commit82b497286d209b50f939ecb4f16dc1e72fcfda95 (patch)
treefc39aff3da7ab89bad516b92950ce3ea1b67336f /compiler/rustc_codegen_cranelift/src
parentf9097f87c9c094f80826fb60a1a624b5f9f1ed82 (diff)
parent8830dccd1d4c74f1f69b0d3bd982a3f1fcde5807 (diff)
downloadrust-82b497286d209b50f939ecb4f16dc1e72fcfda95.tar.gz
rust-82b497286d209b50f939ecb4f16dc1e72fcfda95.zip
Merge commit '8830dccd1d4c74f1f69b0d3bd982a3f1fcde5807' into sync_cg_clif-2023-06-15
Diffstat (limited to 'compiler/rustc_codegen_cranelift/src')
-rw-r--r--compiler/rustc_codegen_cranelift/src/allocator.rs16
-rw-r--r--compiler/rustc_codegen_cranelift/src/base.rs1
-rw-r--r--compiler/rustc_codegen_cranelift/src/common.rs6
-rw-r--r--compiler/rustc_codegen_cranelift/src/config.rs5
-rw-r--r--compiler/rustc_codegen_cranelift/src/constant.rs30
-rw-r--r--compiler/rustc_codegen_cranelift/src/driver/jit.rs9
-rw-r--r--compiler/rustc_codegen_cranelift/src/intrinsics/llvm_x86.rs198
-rw-r--r--compiler/rustc_codegen_cranelift/src/intrinsics/simd.rs138
-rw-r--r--compiler/rustc_codegen_cranelift/src/lib.rs2
-rw-r--r--compiler/rustc_codegen_cranelift/src/trap.rs4
-rw-r--r--compiler/rustc_codegen_cranelift/src/value_and_place.rs21
11 files changed, 384 insertions, 46 deletions
diff --git a/compiler/rustc_codegen_cranelift/src/allocator.rs b/compiler/rustc_codegen_cranelift/src/allocator.rs
index d4b1ae2b613..e92280b26b0 100644
--- a/compiler/rustc_codegen_cranelift/src/allocator.rs
+++ b/compiler/rustc_codegen_cranelift/src/allocator.rs
@@ -89,16 +89,16 @@ fn codegen_inner(
     );
 
     let data_id = module.declare_data(OomStrategy::SYMBOL, Linkage::Export, false, false).unwrap();
-    let mut data_ctx = DataContext::new();
-    data_ctx.set_align(1);
+    let mut data = DataDescription::new();
+    data.set_align(1);
     let val = oom_strategy.should_panic();
-    data_ctx.define(Box::new([val]));
-    module.define_data(data_id, &data_ctx).unwrap();
+    data.define(Box::new([val]));
+    module.define_data(data_id, &data).unwrap();
 
     let data_id =
         module.declare_data(NO_ALLOC_SHIM_IS_UNSTABLE, Linkage::Export, false, false).unwrap();
-    let mut data_ctx = DataContext::new();
-    data_ctx.set_align(1);
-    data_ctx.define(Box::new([0]));
-    module.define_data(data_id, &data_ctx).unwrap();
+    let mut data = DataDescription::new();
+    data.set_align(1);
+    data.define(Box::new([0]));
+    module.define_data(data_id, &data).unwrap();
 }
diff --git a/compiler/rustc_codegen_cranelift/src/base.rs b/compiler/rustc_codegen_cranelift/src/base.rs
index fcfa0b862d4..5abb4644e1b 100644
--- a/compiler/rustc_codegen_cranelift/src/base.rs
+++ b/compiler/rustc_codegen_cranelift/src/base.rs
@@ -156,6 +156,7 @@ pub(crate) fn compile_fn(
             write!(clif, " {}", isa_flag).unwrap();
         }
         writeln!(clif, "\n").unwrap();
+        writeln!(clif, "; symbol {}", codegened_func.symbol_name).unwrap();
         crate::PrintOnPanic(move || {
             let mut clif = clif.clone();
             ::cranelift_codegen::write::decorate_function(
diff --git a/compiler/rustc_codegen_cranelift/src/common.rs b/compiler/rustc_codegen_cranelift/src/common.rs
index 7243cf6da23..a694bb26afb 100644
--- a/compiler/rustc_codegen_cranelift/src/common.rs
+++ b/compiler/rustc_codegen_cranelift/src/common.rs
@@ -455,12 +455,12 @@ impl<'tcx> FunctionCx<'_, '_, 'tcx> {
     }
 
     pub(crate) fn anonymous_str(&mut self, msg: &str) -> Value {
-        let mut data_ctx = DataContext::new();
-        data_ctx.define(msg.as_bytes().to_vec().into_boxed_slice());
+        let mut data = DataDescription::new();
+        data.define(msg.as_bytes().to_vec().into_boxed_slice());
         let msg_id = self.module.declare_anonymous_data(false, false).unwrap();
 
         // Ignore DuplicateDefinition error, as the data will be the same
-        let _ = self.module.define_data(msg_id, &data_ctx);
+        let _ = self.module.define_data(msg_id, &data);
 
         let local_msg_id = self.module.declare_data_in_func(msg_id, self.bcx.func);
         if self.clif_comments.enabled() {
diff --git a/compiler/rustc_codegen_cranelift/src/config.rs b/compiler/rustc_codegen_cranelift/src/config.rs
index 263401e1c4b..9e92d656c76 100644
--- a/compiler/rustc_codegen_cranelift/src/config.rs
+++ b/compiler/rustc_codegen_cranelift/src/config.rs
@@ -82,6 +82,11 @@ impl BackendConfig {
 
         let mut config = BackendConfig::default();
         for opt in opts {
+            if opt.starts_with("-import-instr-limit") {
+                // Silently ignore -import-instr-limit. It is set by rust's build system even when
+                // testing cg_clif.
+                continue;
+            }
             if let Some((name, value)) = opt.split_once('=') {
                 match name {
                     "mode" => config.codegen_mode = value.parse()?,
diff --git a/compiler/rustc_codegen_cranelift/src/constant.rs b/compiler/rustc_codegen_cranelift/src/constant.rs
index 77af561a587..427340c333e 100644
--- a/compiler/rustc_codegen_cranelift/src/constant.rs
+++ b/compiler/rustc_codegen_cranelift/src/constant.rs
@@ -324,12 +324,12 @@ fn data_id_for_static(
 
         let ref_name = format!("_rust_extern_with_linkage_{}", symbol_name);
         let ref_data_id = module.declare_data(&ref_name, Linkage::Local, false, false).unwrap();
-        let mut data_ctx = DataContext::new();
-        data_ctx.set_align(align);
-        let data = module.declare_data_in_data(data_id, &mut data_ctx);
-        data_ctx.define(std::iter::repeat(0).take(pointer_ty(tcx).bytes() as usize).collect());
-        data_ctx.write_data_addr(0, data, 0);
-        match module.define_data(ref_data_id, &data_ctx) {
+        let mut data = DataDescription::new();
+        data.set_align(align);
+        let data_gv = module.declare_data_in_data(data_id, &mut data);
+        data.define(std::iter::repeat(0).take(pointer_ty(tcx).bytes() as usize).collect());
+        data.write_data_addr(0, data_gv, 0);
+        match module.define_data(ref_data_id, &data) {
             // Every time the static is referenced there will be another definition of this global,
             // so duplicate definitions are expected and allowed.
             Err(ModuleError::DuplicateDefinition(_)) => {}
@@ -394,9 +394,9 @@ fn define_all_allocs(tcx: TyCtxt<'_>, module: &mut dyn Module, cx: &mut Constant
             continue;
         }
 
-        let mut data_ctx = DataContext::new();
+        let mut data = DataDescription::new();
         let alloc = alloc.inner();
-        data_ctx.set_align(alloc.align.bytes());
+        data.set_align(alloc.align.bytes());
 
         if let Some(section_name) = section_name {
             let (segment_name, section_name) = if tcx.sess.target.is_like_osx {
@@ -412,11 +412,11 @@ fn define_all_allocs(tcx: TyCtxt<'_>, module: &mut dyn Module, cx: &mut Constant
             } else {
                 ("", section_name.as_str())
             };
-            data_ctx.set_segment_section(segment_name, section_name);
+            data.set_segment_section(segment_name, section_name);
         }
 
         let bytes = alloc.inspect_with_uninit_and_ptr_outside_interpreter(0..alloc.len()).to_vec();
-        data_ctx.define(bytes.into_boxed_slice());
+        data.define(bytes.into_boxed_slice());
 
         for &(offset, alloc_id) in alloc.provenance().ptrs().iter() {
             let addend = {
@@ -435,8 +435,8 @@ fn define_all_allocs(tcx: TyCtxt<'_>, module: &mut dyn Module, cx: &mut Constant
                     assert_eq!(addend, 0);
                     let func_id =
                         crate::abi::import_function(tcx, module, instance.polymorphize(tcx));
-                    let local_func_id = module.declare_func_in_data(func_id, &mut data_ctx);
-                    data_ctx.write_function_addr(offset.bytes() as u32, local_func_id);
+                    let local_func_id = module.declare_func_in_data(func_id, &mut data);
+                    data.write_function_addr(offset.bytes() as u32, local_func_id);
                     continue;
                 }
                 GlobalAlloc::Memory(target_alloc) => {
@@ -462,11 +462,11 @@ fn define_all_allocs(tcx: TyCtxt<'_>, module: &mut dyn Module, cx: &mut Constant
                 }
             };
 
-            let global_value = module.declare_data_in_data(data_id, &mut data_ctx);
-            data_ctx.write_data_addr(offset.bytes() as u32, global_value, addend as i64);
+            let global_value = module.declare_data_in_data(data_id, &mut data);
+            data.write_data_addr(offset.bytes() as u32, global_value, addend as i64);
         }
 
-        module.define_data(data_id, &data_ctx).unwrap();
+        module.define_data(data_id, &data).unwrap();
         cx.done.insert(data_id);
     }
 
diff --git a/compiler/rustc_codegen_cranelift/src/driver/jit.rs b/compiler/rustc_codegen_cranelift/src/driver/jit.rs
index 3118105a4e2..41e24acefbe 100644
--- a/compiler/rustc_codegen_cranelift/src/driver/jit.rs
+++ b/compiler/rustc_codegen_cranelift/src/driver/jit.rs
@@ -4,7 +4,7 @@
 use std::cell::RefCell;
 use std::ffi::CString;
 use std::os::raw::{c_char, c_int};
-use std::sync::{mpsc, Mutex};
+use std::sync::{mpsc, Mutex, OnceLock};
 
 use rustc_codegen_ssa::CrateInfo;
 use rustc_middle::mir::mono::MonoItem;
@@ -13,9 +13,6 @@ use rustc_span::Symbol;
 
 use cranelift_jit::{JITBuilder, JITModule};
 
-// FIXME use std::sync::OnceLock once it stabilizes
-use once_cell::sync::OnceCell;
-
 use crate::{prelude::*, BackendConfig};
 use crate::{CodegenCx, CodegenMode};
 
@@ -29,7 +26,7 @@ thread_local! {
 }
 
 /// The Sender owned by the rustc thread
-static GLOBAL_MESSAGE_SENDER: OnceCell<Mutex<mpsc::Sender<UnsafeMessage>>> = OnceCell::new();
+static GLOBAL_MESSAGE_SENDER: OnceLock<Mutex<mpsc::Sender<UnsafeMessage>>> = OnceLock::new();
 
 /// A message that is sent from the jitted runtime to the rustc thread.
 /// Senders are responsible for upholding `Send` semantics.
@@ -325,7 +322,7 @@ fn dep_symbol_lookup_fn(
             Linkage::NotLinked | Linkage::IncludedFromDylib => {}
             Linkage::Static => {
                 let name = crate_info.crate_name[&cnum];
-                let mut err = sess.struct_err(&format!("Can't load static lib {}", name));
+                let mut err = sess.struct_err(format!("Can't load static lib {}", name));
                 err.note("rustc_codegen_cranelift can only load dylibs in JIT mode.");
                 err.emit();
             }
diff --git a/compiler/rustc_codegen_cranelift/src/intrinsics/llvm_x86.rs b/compiler/rustc_codegen_cranelift/src/intrinsics/llvm_x86.rs
index 56d8f13cec5..bbd5f4be783 100644
--- a/compiler/rustc_codegen_cranelift/src/intrinsics/llvm_x86.rs
+++ b/compiler/rustc_codegen_cranelift/src/intrinsics/llvm_x86.rs
@@ -19,7 +19,10 @@ pub(crate) fn codegen_x86_llvm_intrinsic_call<'tcx>(
         }
 
         // Used by `_mm_movemask_epi8` and `_mm256_movemask_epi8`
-        "llvm.x86.sse2.pmovmskb.128" | "llvm.x86.avx2.pmovmskb" | "llvm.x86.sse2.movmsk.pd" => {
+        "llvm.x86.sse2.pmovmskb.128"
+        | "llvm.x86.avx2.pmovmskb"
+        | "llvm.x86.sse.movmsk.ps"
+        | "llvm.x86.sse2.movmsk.pd" => {
             intrinsic_args!(fx, args => (a); intrinsic);
 
             let (lane_count, lane_ty) = a.layout().ty.simd_size_and_type(fx.tcx);
@@ -107,16 +110,209 @@ pub(crate) fn codegen_x86_llvm_intrinsic_call<'tcx>(
             };
             let a = codegen_operand(fx, a);
             let imm8 = crate::constant::mir_operand_get_const_val(fx, imm8)
+                .expect("llvm.x86.sse2.pslli.d imm8 not const");
+
+            simd_for_each_lane(fx, a, ret, &|fx, _lane_ty, _res_lane_ty, lane| match imm8
+                .try_to_bits(Size::from_bytes(4))
+                .unwrap_or_else(|| panic!("imm8 not scalar: {:?}", imm8))
+            {
+                imm8 if imm8 < 32 => fx.bcx.ins().ishl_imm(lane, i64::from(imm8 as u8)),
+                _ => fx.bcx.ins().iconst(types::I32, 0),
+            });
+        }
+        "llvm.x86.sse2.psrli.w" => {
+            let (a, imm8) = match args {
+                [a, imm8] => (a, imm8),
+                _ => bug!("wrong number of args for intrinsic {intrinsic}"),
+            };
+            let a = codegen_operand(fx, a);
+            let imm8 = crate::constant::mir_operand_get_const_val(fx, imm8)
                 .expect("llvm.x86.sse2.psrli.d imm8 not const");
 
             simd_for_each_lane(fx, a, ret, &|fx, _lane_ty, _res_lane_ty, lane| match imm8
                 .try_to_bits(Size::from_bytes(4))
                 .unwrap_or_else(|| panic!("imm8 not scalar: {:?}", imm8))
             {
+                imm8 if imm8 < 16 => fx.bcx.ins().ushr_imm(lane, i64::from(imm8 as u8)),
+                _ => fx.bcx.ins().iconst(types::I32, 0),
+            });
+        }
+        "llvm.x86.sse2.pslli.w" => {
+            let (a, imm8) = match args {
+                [a, imm8] => (a, imm8),
+                _ => bug!("wrong number of args for intrinsic {intrinsic}"),
+            };
+            let a = codegen_operand(fx, a);
+            let imm8 = crate::constant::mir_operand_get_const_val(fx, imm8)
+                .expect("llvm.x86.sse2.pslli.d imm8 not const");
+
+            simd_for_each_lane(fx, a, ret, &|fx, _lane_ty, _res_lane_ty, lane| match imm8
+                .try_to_bits(Size::from_bytes(4))
+                .unwrap_or_else(|| panic!("imm8 not scalar: {:?}", imm8))
+            {
+                imm8 if imm8 < 16 => fx.bcx.ins().ishl_imm(lane, i64::from(imm8 as u8)),
+                _ => fx.bcx.ins().iconst(types::I32, 0),
+            });
+        }
+        "llvm.x86.avx.psrli.d" => {
+            let (a, imm8) = match args {
+                [a, imm8] => (a, imm8),
+                _ => bug!("wrong number of args for intrinsic {intrinsic}"),
+            };
+            let a = codegen_operand(fx, a);
+            let imm8 = crate::constant::mir_operand_get_const_val(fx, imm8)
+                .expect("llvm.x86.avx.psrli.d imm8 not const");
+
+            simd_for_each_lane(fx, a, ret, &|fx, _lane_ty, _res_lane_ty, lane| match imm8
+                .try_to_bits(Size::from_bytes(4))
+                .unwrap_or_else(|| panic!("imm8 not scalar: {:?}", imm8))
+            {
+                imm8 if imm8 < 32 => fx.bcx.ins().ushr_imm(lane, i64::from(imm8 as u8)),
+                _ => fx.bcx.ins().iconst(types::I32, 0),
+            });
+        }
+        "llvm.x86.avx.pslli.d" => {
+            let (a, imm8) = match args {
+                [a, imm8] => (a, imm8),
+                _ => bug!("wrong number of args for intrinsic {intrinsic}"),
+            };
+            let a = codegen_operand(fx, a);
+            let imm8 = crate::constant::mir_operand_get_const_val(fx, imm8)
+                .expect("llvm.x86.avx.pslli.d imm8 not const");
+
+            simd_for_each_lane(fx, a, ret, &|fx, _lane_ty, _res_lane_ty, lane| match imm8
+                .try_to_bits(Size::from_bytes(4))
+                .unwrap_or_else(|| panic!("imm8 not scalar: {:?}", imm8))
+            {
                 imm8 if imm8 < 32 => fx.bcx.ins().ishl_imm(lane, i64::from(imm8 as u8)),
                 _ => fx.bcx.ins().iconst(types::I32, 0),
             });
         }
+        "llvm.x86.avx2.psrli.w" => {
+            let (a, imm8) = match args {
+                [a, imm8] => (a, imm8),
+                _ => bug!("wrong number of args for intrinsic {intrinsic}"),
+            };
+            let a = codegen_operand(fx, a);
+            let imm8 = crate::constant::mir_operand_get_const_val(fx, imm8)
+                .expect("llvm.x86.avx.psrli.w imm8 not const");
+
+            simd_for_each_lane(fx, a, ret, &|fx, _lane_ty, _res_lane_ty, lane| match imm8
+                .try_to_bits(Size::from_bytes(4))
+                .unwrap_or_else(|| panic!("imm8 not scalar: {:?}", imm8))
+            {
+                imm8 if imm8 < 16 => fx.bcx.ins().ushr_imm(lane, i64::from(imm8 as u8)),
+                _ => fx.bcx.ins().iconst(types::I32, 0),
+            });
+        }
+        "llvm.x86.avx2.pslli.w" => {
+            let (a, imm8) = match args {
+                [a, imm8] => (a, imm8),
+                _ => bug!("wrong number of args for intrinsic {intrinsic}"),
+            };
+            let a = codegen_operand(fx, a);
+            let imm8 = crate::constant::mir_operand_get_const_val(fx, imm8)
+                .expect("llvm.x86.avx.pslli.w imm8 not const");
+
+            simd_for_each_lane(fx, a, ret, &|fx, _lane_ty, _res_lane_ty, lane| match imm8
+                .try_to_bits(Size::from_bytes(4))
+                .unwrap_or_else(|| panic!("imm8 not scalar: {:?}", imm8))
+            {
+                imm8 if imm8 < 16 => fx.bcx.ins().ishl_imm(lane, i64::from(imm8 as u8)),
+                _ => fx.bcx.ins().iconst(types::I32, 0),
+            });
+        }
+        "llvm.x86.ssse3.pshuf.b.128" | "llvm.x86.avx2.pshuf.b" => {
+            let (a, b) = match args {
+                [a, b] => (a, b),
+                _ => bug!("wrong number of args for intrinsic {intrinsic}"),
+            };
+            let a = codegen_operand(fx, a);
+            let b = codegen_operand(fx, b);
+
+            // Based on the pseudocode at https://github.com/rust-lang/stdarch/blob/1cfbca8b38fd9b4282b2f054f61c6ca69fc7ce29/crates/core_arch/src/x86/avx2.rs#L2319-L2332
+            let zero = fx.bcx.ins().iconst(types::I8, 0);
+            for i in 0..16 {
+                let b_lane = b.value_lane(fx, i).load_scalar(fx);
+                let is_zero = fx.bcx.ins().band_imm(b_lane, 0x80);
+                let a_idx = fx.bcx.ins().band_imm(b_lane, 0xf);
+                let a_idx = fx.bcx.ins().uextend(fx.pointer_type, a_idx);
+                let a_lane = a.value_lane_dyn(fx, a_idx).load_scalar(fx);
+                let res = fx.bcx.ins().select(is_zero, zero, a_lane);
+                ret.place_lane(fx, i).to_ptr().store(fx, res, MemFlags::trusted());
+            }
+
+            if intrinsic == "llvm.x86.avx2.pshuf.b" {
+                for i in 16..32 {
+                    let b_lane = b.value_lane(fx, i).load_scalar(fx);
+                    let is_zero = fx.bcx.ins().band_imm(b_lane, 0x80);
+                    let b_lane_masked = fx.bcx.ins().band_imm(b_lane, 0xf);
+                    let a_idx = fx.bcx.ins().iadd_imm(b_lane_masked, 16);
+                    let a_idx = fx.bcx.ins().uextend(fx.pointer_type, a_idx);
+                    let a_lane = a.value_lane_dyn(fx, a_idx).load_scalar(fx);
+                    let res = fx.bcx.ins().select(is_zero, zero, a_lane);
+                    ret.place_lane(fx, i).to_ptr().store(fx, res, MemFlags::trusted());
+                }
+            }
+        }
+        "llvm.x86.avx2.vperm2i128" => {
+            // https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_permute2x128_si256
+            let (a, b, imm8) = match args {
+                [a, b, imm8] => (a, b, imm8),
+                _ => bug!("wrong number of args for intrinsic {intrinsic}"),
+            };
+            let a = codegen_operand(fx, a);
+            let b = codegen_operand(fx, b);
+            let imm8 = codegen_operand(fx, imm8).load_scalar(fx);
+
+            let a_0 = a.value_lane(fx, 0).load_scalar(fx);
+            let a_1 = a.value_lane(fx, 1).load_scalar(fx);
+            let a_low = fx.bcx.ins().iconcat(a_0, a_1);
+            let a_2 = a.value_lane(fx, 2).load_scalar(fx);
+            let a_3 = a.value_lane(fx, 3).load_scalar(fx);
+            let a_high = fx.bcx.ins().iconcat(a_2, a_3);
+
+            let b_0 = b.value_lane(fx, 0).load_scalar(fx);
+            let b_1 = b.value_lane(fx, 1).load_scalar(fx);
+            let b_low = fx.bcx.ins().iconcat(b_0, b_1);
+            let b_2 = b.value_lane(fx, 2).load_scalar(fx);
+            let b_3 = b.value_lane(fx, 3).load_scalar(fx);
+            let b_high = fx.bcx.ins().iconcat(b_2, b_3);
+
+            fn select4(
+                fx: &mut FunctionCx<'_, '_, '_>,
+                a_high: Value,
+                a_low: Value,
+                b_high: Value,
+                b_low: Value,
+                control: Value,
+            ) -> Value {
+                let a_or_b = fx.bcx.ins().band_imm(control, 0b0010);
+                let high_or_low = fx.bcx.ins().band_imm(control, 0b0001);
+                let is_zero = fx.bcx.ins().band_imm(control, 0b1000);
+
+                let zero = fx.bcx.ins().iconst(types::I64, 0);
+                let zero = fx.bcx.ins().iconcat(zero, zero);
+
+                let res_a = fx.bcx.ins().select(high_or_low, a_high, a_low);
+                let res_b = fx.bcx.ins().select(high_or_low, b_high, b_low);
+                let res = fx.bcx.ins().select(a_or_b, res_b, res_a);
+                fx.bcx.ins().select(is_zero, zero, res)
+            }
+
+            let control0 = imm8;
+            let res_low = select4(fx, a_high, a_low, b_high, b_low, control0);
+            let (res_0, res_1) = fx.bcx.ins().isplit(res_low);
+
+            let control1 = fx.bcx.ins().ushr_imm(imm8, 4);
+            let res_high = select4(fx, a_high, a_low, b_high, b_low, control1);
+            let (res_2, res_3) = fx.bcx.ins().isplit(res_high);
+
+            ret.place_lane(fx, 0).to_ptr().store(fx, res_0, MemFlags::trusted());
+            ret.place_lane(fx, 1).to_ptr().store(fx, res_1, MemFlags::trusted());
+            ret.place_lane(fx, 2).to_ptr().store(fx, res_2, MemFlags::trusted());
+            ret.place_lane(fx, 3).to_ptr().store(fx, res_3, MemFlags::trusted());
+        }
         "llvm.x86.sse2.storeu.dq" => {
             intrinsic_args!(fx, args => (mem_addr, a); intrinsic);
             let mem_addr = mem_addr.load_scalar(fx);
diff --git a/compiler/rustc_codegen_cranelift/src/intrinsics/simd.rs b/compiler/rustc_codegen_cranelift/src/intrinsics/simd.rs
index 5a038bfca5d..6741362e8b6 100644
--- a/compiler/rustc_codegen_cranelift/src/intrinsics/simd.rs
+++ b/compiler/rustc_codegen_cranelift/src/intrinsics/simd.rs
@@ -434,8 +434,36 @@ pub(super) fn codegen_simd_intrinsic_call<'tcx>(
             });
         }
 
-        sym::simd_round => {
-            intrinsic_args!(fx, args => (a); intrinsic);
+        sym::simd_fpow => {
+            intrinsic_args!(fx, args => (a, b); intrinsic);
+
+            if !a.layout().ty.is_simd() {
+                report_simd_type_validation_error(fx, intrinsic, span, a.layout().ty);
+                return;
+            }
+
+            simd_pair_for_each_lane(fx, a, b, ret, &|fx, lane_ty, _ret_lane_ty, a_lane, b_lane| {
+                match lane_ty.kind() {
+                    ty::Float(FloatTy::F32) => fx.lib_call(
+                        "powf",
+                        vec![AbiParam::new(types::F32), AbiParam::new(types::F32)],
+                        vec![AbiParam::new(types::F32)],
+                        &[a_lane, b_lane],
+                    )[0],
+                    ty::Float(FloatTy::F64) => fx.lib_call(
+                        "pow",
+                        vec![AbiParam::new(types::F64), AbiParam::new(types::F64)],
+                        vec![AbiParam::new(types::F64)],
+                        &[a_lane, b_lane],
+                    )[0],
+                    _ => unreachable!("{:?}", lane_ty),
+                }
+            });
+        }
+
+        sym::simd_fpowi => {
+            intrinsic_args!(fx, args => (a, exp); intrinsic);
+            let exp = exp.load_scalar(fx);
 
             if !a.layout().ty.is_simd() {
                 report_simd_type_validation_error(fx, intrinsic, span, a.layout().ty);
@@ -448,22 +476,71 @@ pub(super) fn codegen_simd_intrinsic_call<'tcx>(
                 ret,
                 &|fx, lane_ty, _ret_lane_ty, lane| match lane_ty.kind() {
                     ty::Float(FloatTy::F32) => fx.lib_call(
-                        "roundf",
+                        "__powisf2", // compiler-builtins
+                        vec![AbiParam::new(types::F32), AbiParam::new(types::I32)],
                         vec![AbiParam::new(types::F32)],
-                        vec![AbiParam::new(types::F32)],
-                        &[lane],
+                        &[lane, exp],
                     )[0],
                     ty::Float(FloatTy::F64) => fx.lib_call(
-                        "round",
-                        vec![AbiParam::new(types::F64)],
+                        "__powidf2", // compiler-builtins
+                        vec![AbiParam::new(types::F64), AbiParam::new(types::I32)],
                         vec![AbiParam::new(types::F64)],
-                        &[lane],
+                        &[lane, exp],
                     )[0],
                     _ => unreachable!("{:?}", lane_ty),
                 },
             );
         }
 
+        sym::simd_fsin
+        | sym::simd_fcos
+        | sym::simd_fexp
+        | sym::simd_fexp2
+        | sym::simd_flog
+        | sym::simd_flog10
+        | sym::simd_flog2
+        | sym::simd_round => {
+            intrinsic_args!(fx, args => (a); intrinsic);
+
+            if !a.layout().ty.is_simd() {
+                report_simd_type_validation_error(fx, intrinsic, span, a.layout().ty);
+                return;
+            }
+
+            simd_for_each_lane(fx, a, ret, &|fx, lane_ty, _ret_lane_ty, lane| {
+                let lane_ty = match lane_ty.kind() {
+                    ty::Float(FloatTy::F32) => types::F32,
+                    ty::Float(FloatTy::F64) => types::F64,
+                    _ => unreachable!("{:?}", lane_ty),
+                };
+                let name = match (intrinsic, lane_ty) {
+                    (sym::simd_fsin, types::F32) => "sinf",
+                    (sym::simd_fsin, types::F64) => "sin",
+                    (sym::simd_fcos, types::F32) => "cosf",
+                    (sym::simd_fcos, types::F64) => "cos",
+                    (sym::simd_fexp, types::F32) => "expf",
+                    (sym::simd_fexp, types::F64) => "exp",
+                    (sym::simd_fexp2, types::F32) => "exp2f",
+                    (sym::simd_fexp2, types::F64) => "exp2",
+                    (sym::simd_flog, types::F32) => "logf",
+                    (sym::simd_flog, types::F64) => "log",
+                    (sym::simd_flog10, types::F32) => "log10f",
+                    (sym::simd_flog10, types::F64) => "log10",
+                    (sym::simd_flog2, types::F32) => "log2f",
+                    (sym::simd_flog2, types::F64) => "log2",
+                    (sym::simd_round, types::F32) => "roundf",
+                    (sym::simd_round, types::F64) => "round",
+                    _ => unreachable!("{:?}", intrinsic),
+                };
+                fx.lib_call(
+                    name,
+                    vec![AbiParam::new(lane_ty)],
+                    vec![AbiParam::new(lane_ty)],
+                    &[lane],
+                )[0]
+            });
+        }
+
         sym::simd_fabs | sym::simd_fsqrt | sym::simd_ceil | sym::simd_floor | sym::simd_trunc => {
             intrinsic_args!(fx, args => (a); intrinsic);
 
@@ -488,7 +565,7 @@ pub(super) fn codegen_simd_intrinsic_call<'tcx>(
             });
         }
 
-        sym::simd_reduce_add_ordered | sym::simd_reduce_add_unordered => {
+        sym::simd_reduce_add_ordered => {
             intrinsic_args!(fx, args => (v, acc); intrinsic);
             let acc = acc.load_scalar(fx);
 
@@ -507,7 +584,25 @@ pub(super) fn codegen_simd_intrinsic_call<'tcx>(
             });
         }
 
-        sym::simd_reduce_mul_ordered | sym::simd_reduce_mul_unordered => {
+        sym::simd_reduce_add_unordered => {
+            intrinsic_args!(fx, args => (v); intrinsic);
+
+            // FIXME there must be no acc param for integer vectors
+            if !v.layout().ty.is_simd() {
+                report_simd_type_validation_error(fx, intrinsic, span, v.layout().ty);
+                return;
+            }
+
+            simd_reduce(fx, v, None, ret, &|fx, lane_ty, a, b| {
+                if lane_ty.is_floating_point() {
+                    fx.bcx.ins().fadd(a, b)
+                } else {
+                    fx.bcx.ins().iadd(a, b)
+                }
+            });
+        }
+
+        sym::simd_reduce_mul_ordered => {
             intrinsic_args!(fx, args => (v, acc); intrinsic);
             let acc = acc.load_scalar(fx);
 
@@ -526,6 +621,24 @@ pub(super) fn codegen_simd_intrinsic_call<'tcx>(
             });
         }
 
+        sym::simd_reduce_mul_unordered => {
+            intrinsic_args!(fx, args => (v); intrinsic);
+
+            // FIXME there must be no acc param for integer vectors
+            if !v.layout().ty.is_simd() {
+                report_simd_type_validation_error(fx, intrinsic, span, v.layout().ty);
+                return;
+            }
+
+            simd_reduce(fx, v, None, ret, &|fx, lane_ty, a, b| {
+                if lane_ty.is_floating_point() {
+                    fx.bcx.ins().fmul(a, b)
+                } else {
+                    fx.bcx.ins().imul(a, b)
+                }
+            });
+        }
+
         sym::simd_reduce_all => {
             intrinsic_args!(fx, args => (v); intrinsic);
 
@@ -581,7 +694,7 @@ pub(super) fn codegen_simd_intrinsic_call<'tcx>(
             simd_reduce(fx, v, None, ret, &|fx, _ty, a, b| fx.bcx.ins().bxor(a, b));
         }
 
-        sym::simd_reduce_min => {
+        sym::simd_reduce_min | sym::simd_reduce_min_nanless => {
             intrinsic_args!(fx, args => (v); intrinsic);
 
             if !v.layout().ty.is_simd() {
@@ -600,7 +713,7 @@ pub(super) fn codegen_simd_intrinsic_call<'tcx>(
             });
         }
 
-        sym::simd_reduce_max => {
+        sym::simd_reduce_max | sym::simd_reduce_max_nanless => {
             intrinsic_args!(fx, args => (v); intrinsic);
 
             if !v.layout().ty.is_simd() {
@@ -878,6 +991,7 @@ pub(super) fn codegen_simd_intrinsic_call<'tcx>(
             fx.tcx.sess.span_err(span, format!("Unknown SIMD intrinsic {}", intrinsic));
             // Prevent verifier error
             fx.bcx.ins().trap(TrapCode::UnreachableCodeReached);
+            return;
         }
     }
     let ret_block = fx.get_block(target);
diff --git a/compiler/rustc_codegen_cranelift/src/lib.rs b/compiler/rustc_codegen_cranelift/src/lib.rs
index 095fbe62c19..0de2dccda71 100644
--- a/compiler/rustc_codegen_cranelift/src/lib.rs
+++ b/compiler/rustc_codegen_cranelift/src/lib.rs
@@ -102,7 +102,7 @@ mod prelude {
     pub(crate) use cranelift_codegen::isa::{self, CallConv};
     pub(crate) use cranelift_codegen::Context;
     pub(crate) use cranelift_frontend::{FunctionBuilder, FunctionBuilderContext, Variable};
-    pub(crate) use cranelift_module::{self, DataContext, FuncId, Linkage, Module};
+    pub(crate) use cranelift_module::{self, DataDescription, FuncId, Linkage, Module};
 
     pub(crate) use crate::abi::*;
     pub(crate) use crate::base::{codegen_operand, codegen_place};
diff --git a/compiler/rustc_codegen_cranelift/src/trap.rs b/compiler/rustc_codegen_cranelift/src/trap.rs
index 82a2ec57954..2fb0c2164c3 100644
--- a/compiler/rustc_codegen_cranelift/src/trap.rs
+++ b/compiler/rustc_codegen_cranelift/src/trap.rs
@@ -30,5 +30,9 @@ fn codegen_print(fx: &mut FunctionCx<'_, '_, '_>, msg: &str) {
 /// Trap code: user65535
 pub(crate) fn trap_unimplemented(fx: &mut FunctionCx<'_, '_, '_>, msg: impl AsRef<str>) {
     codegen_print(fx, msg.as_ref());
+
+    let one = fx.bcx.ins().iconst(types::I32, 1);
+    fx.lib_call("exit", vec![AbiParam::new(types::I32)], vec![], &[one]);
+
     fx.bcx.ins().trap(TrapCode::User(!0));
 }
diff --git a/compiler/rustc_codegen_cranelift/src/value_and_place.rs b/compiler/rustc_codegen_cranelift/src/value_and_place.rs
index b1fda6ff213..133c989b686 100644
--- a/compiler/rustc_codegen_cranelift/src/value_and_place.rs
+++ b/compiler/rustc_codegen_cranelift/src/value_and_place.rs
@@ -258,6 +258,27 @@ impl<'tcx> CValue<'tcx> {
         }
     }
 
+    /// Like [`CValue::value_lane`] except allowing a dynamically calculated lane index.
+    pub(crate) fn value_lane_dyn(
+        self,
+        fx: &mut FunctionCx<'_, '_, 'tcx>,
+        lane_idx: Value,
+    ) -> CValue<'tcx> {
+        let layout = self.1;
+        assert!(layout.ty.is_simd());
+        let (_lane_count, lane_ty) = layout.ty.simd_size_and_type(fx.tcx);
+        let lane_layout = fx.layout_of(lane_ty);
+        match self.0 {
+            CValueInner::ByVal(_) | CValueInner::ByValPair(_, _) => unreachable!(),
+            CValueInner::ByRef(ptr, None) => {
+                let field_offset = fx.bcx.ins().imul_imm(lane_idx, lane_layout.size.bytes() as i64);
+                let field_ptr = ptr.offset_value(fx, field_offset);
+                CValue::by_ref(field_ptr, lane_layout)
+            }
+            CValueInner::ByRef(_, Some(_)) => unreachable!(),
+        }
+    }
+
     /// If `ty` is signed, `const_val` must already be sign extended.
     pub(crate) fn const_val(
         fx: &mut FunctionCx<'_, '_, 'tcx>,