about summary refs log tree commit diff
diff options
context:
space:
mode:
authorbjorn3 <17426603+bjorn3@users.noreply.github.com>2023-10-24 12:22:23 +0000
committerbjorn3 <17426603+bjorn3@users.noreply.github.com>2023-10-24 12:22:23 +0000
commit484bc7fc8885cc580c50be4c74b800ff82451613 (patch)
tree9b93e95dc00b6d9fb21ee61a94976c4c0bfe8b50
parent271dcc1d40b57ad129d88fef1aa7f239308fbfb4 (diff)
parent93a5433f17ab5ed48cc88f1e69b0713b16183373 (diff)
downloadrust-484bc7fc8885cc580c50be4c74b800ff82451613.tar.gz
rust-484bc7fc8885cc580c50be4c74b800ff82451613.zip
Merge commit '93a5433f17ab5ed48cc88f1e69b0713b16183373' into sync_cg_clif-2023-10-24
-rw-r--r--compiler/rustc_codegen_cranelift/.vscode/settings.json2
-rw-r--r--compiler/rustc_codegen_cranelift/Cargo.lock52
-rw-r--r--compiler/rustc_codegen_cranelift/Cargo.toml12
-rw-r--r--compiler/rustc_codegen_cranelift/Readme.md6
-rw-r--r--compiler/rustc_codegen_cranelift/example/mini_core_hello_world.rs11
-rw-r--r--compiler/rustc_codegen_cranelift/patches/0001-regex-Ignore-test-which-gets-miscompiled-with-llvm-sysroot.patch25
-rw-r--r--compiler/rustc_codegen_cranelift/src/abi/mod.rs35
-rw-r--r--compiler/rustc_codegen_cranelift/src/abi/pass_mode.rs11
-rw-r--r--compiler/rustc_codegen_cranelift/src/cast.rs6
-rw-r--r--compiler/rustc_codegen_cranelift/src/common.rs19
-rw-r--r--compiler/rustc_codegen_cranelift/src/driver/aot.rs23
-rw-r--r--compiler/rustc_codegen_cranelift/src/inline_asm.rs22
-rw-r--r--compiler/rustc_codegen_cranelift/src/intrinsics/llvm_x86.rs137
-rw-r--r--compiler/rustc_codegen_cranelift/src/value_and_place.rs37
14 files changed, 260 insertions, 138 deletions
diff --git a/compiler/rustc_codegen_cranelift/.vscode/settings.json b/compiler/rustc_codegen_cranelift/.vscode/settings.json
index 60cb51d5663..834a1362caf 100644
--- a/compiler/rustc_codegen_cranelift/.vscode/settings.json
+++ b/compiler/rustc_codegen_cranelift/.vscode/settings.json
@@ -33,7 +33,7 @@
             ]
         },
         {
-            "sysroot_src": "./download/sysroot/sysroot_src/library",
+            "sysroot_src": "./build/stdlib/library",
             "crates": [
                 {
                     "root_module": "./example/std_example.rs",
diff --git a/compiler/rustc_codegen_cranelift/Cargo.lock b/compiler/rustc_codegen_cranelift/Cargo.lock
index 8079913cb0f..c716d501173 100644
--- a/compiler/rustc_codegen_cranelift/Cargo.lock
+++ b/compiler/rustc_codegen_cranelift/Cargo.lock
@@ -45,18 +45,18 @@ checksum = "baf1de4339761588bc0619e3cbc0120ee582ebb74b53b4efbf79117bd2da40fd"
 
 [[package]]
 name = "cranelift-bforest"
-version = "0.101.0"
+version = "0.101.1"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "8e5e1df0da8488dd03b34afc134ba84b754d61862cc465932a9e5d07952f661e"
+checksum = "c1512c3bb6b13018e7109fc3ac964bc87b329eaf3a77825d337558d0c7f6f1be"
 dependencies = [
  "cranelift-entity",
 ]
 
 [[package]]
 name = "cranelift-codegen"
-version = "0.101.0"
+version = "0.101.1"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "77a17ca4e699a0aaf49a0c88f6311a864f321048aa63f6b787cab20eb5f93f10"
+checksum = "16cb8fb9220a6ea7a226705a273ab905309ee546267bdf34948d57932d7f0396"
 dependencies = [
  "bumpalo",
  "cranelift-bforest",
@@ -75,39 +75,39 @@ dependencies = [
 
 [[package]]
 name = "cranelift-codegen-meta"
-version = "0.101.0"
+version = "0.101.1"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "022f2793cdade1d37a1f755ac42938a3f832f533eac6cafc8b26b209544c3c06"
+checksum = "ab3a8d3b0d4745b183da5ea0792b13d79f5c23d6e69ac04761728e2532b56649"
 dependencies = [
  "cranelift-codegen-shared",
 ]
 
 [[package]]
 name = "cranelift-codegen-shared"
-version = "0.101.0"
+version = "0.101.1"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "a4d72dbb83c2ad788dec4ad0843070973cb48c35a3ca19b1e7437ac40834fd9c"
+checksum = "524141c8e68f2abc2043de4c2b31f6d9dd42432738c246431d0572a1422a4a84"
 
 [[package]]
 name = "cranelift-control"
-version = "0.101.0"
+version = "0.101.1"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "ae07cf26dcc90d546826d747ac63b6c40c916f34b03e92a6ae0422c28d771b8a"
+checksum = "97513b57c961c713789a03886a57b43e14ebcd204cbaa8ae50ca6c70a8e716b3"
 dependencies = [
  "arbitrary",
 ]
 
 [[package]]
 name = "cranelift-entity"
-version = "0.101.0"
+version = "0.101.1"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "c2fe6b7e49820893691aea497f36257e9d6f52061d8c4758d61d802d5f101a3d"
+checksum = "e3f23d3cf3afa7e45f239702612c76d87964f652a55e28d13ed6d7e20f3479dd"
 
 [[package]]
 name = "cranelift-frontend"
-version = "0.101.0"
+version = "0.101.1"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "44f497576ca3674581581601b6a55ccc1b43447217648c880e5bce70db3cf659"
+checksum = "554cd4947ec9209b58bf9ae5bf83581b5ddf9128bd967208e334b504a57db54e"
 dependencies = [
  "cranelift-codegen",
  "log",
@@ -117,15 +117,15 @@ dependencies = [
 
 [[package]]
 name = "cranelift-isle"
-version = "0.101.0"
+version = "0.101.1"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "b96aa02eac00fffee13b0cd37d17874ccdb3d5458983041accd825ef78ce6454"
+checksum = "6c1892a439696b6413cb54083806f5fd9fc431768b8de74864b3d9e8b93b124f"
 
 [[package]]
 name = "cranelift-jit"
-version = "0.101.0"
+version = "0.101.1"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "b1d6e0e308c873eefc185745a6b21daec2a10f7554c9fb67e334c2d7d756d979"
+checksum = "32209252fb38acaf1662ccd0397907bbe0e92bdb13b6ddbfd2f74e437f83e685"
 dependencies = [
  "anyhow",
  "cranelift-codegen",
@@ -143,9 +143,9 @@ dependencies = [
 
 [[package]]
 name = "cranelift-module"
-version = "0.101.0"
+version = "0.101.1"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "c1aa8ebb06eced4e478c3f94f1d65d4e7c93493f4640057912b27a3e34b84841"
+checksum = "bf42656f5f6df7bfafc4dd7b63a1888b0627c07b43b2cb9aa54e13843fed39eb"
 dependencies = [
  "anyhow",
  "cranelift-codegen",
@@ -154,9 +154,9 @@ dependencies = [
 
 [[package]]
 name = "cranelift-native"
-version = "0.101.0"
+version = "0.101.1"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "2870170ca44054b202c737626607b87be6e35655084bd94a6ff807a5812ba7df"
+checksum = "e0c2d3badd4b9690865f5bb68a71fa94de592fa2df3f3d11a5a062c60c0a107a"
 dependencies = [
  "cranelift-codegen",
  "libc",
@@ -165,9 +165,9 @@ dependencies = [
 
 [[package]]
 name = "cranelift-object"
-version = "0.101.0"
+version = "0.101.1"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "20647761742d17dabac8205da958910ede78599550e06418a16711a3ee2fc897"
+checksum = "88eca54bbecea3170035168357306e9c779d4a63d8bf036c9e16bd21fdaa69b5"
 dependencies = [
  "anyhow",
  "cranelift-codegen",
@@ -374,9 +374,9 @@ checksum = "49874b5167b65d7193b8aba1567f5c7d93d001cafc34600cee003eda787e483f"
 
 [[package]]
 name = "wasmtime-jit-icache-coherence"
-version = "14.0.0"
+version = "14.0.1"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "a3a5dda53ad6993f9b0a2d65fb49e0348a7232a27a8794064122870d6ee19eb2"
+checksum = "9aaf2fa8fd2d6b65abae9b92edfe69254cc5d6b166e342364036c3e347de8da9"
 dependencies = [
  "cfg-if",
  "libc",
diff --git a/compiler/rustc_codegen_cranelift/Cargo.toml b/compiler/rustc_codegen_cranelift/Cargo.toml
index 5ad5e0e8140..f2ce714e8ff 100644
--- a/compiler/rustc_codegen_cranelift/Cargo.toml
+++ b/compiler/rustc_codegen_cranelift/Cargo.toml
@@ -8,12 +8,12 @@ crate-type = ["dylib"]
 
 [dependencies]
 # These have to be in sync with each other
-cranelift-codegen = { version = "0.101", features = ["unwind", "all-arch"] }
-cranelift-frontend = { version = "0.101" }
-cranelift-module = { version = "0.101" }
-cranelift-native = { version = "0.101" }
-cranelift-jit = { version = "0.101", optional = true }
-cranelift-object = { version = "0.101" }
+cranelift-codegen = { version = "0.101.1", features = ["unwind", "all-arch"] }
+cranelift-frontend = { version = "0.101.1" }
+cranelift-module = { version = "0.101.1" }
+cranelift-native = { version = "0.101.1" }
+cranelift-jit = { version = "0.101.1", optional = true }
+cranelift-object = { version = "0.101.1" }
 target-lexicon = "0.12.0"
 gimli = { version = "0.28", default-features = false, features = ["write"]}
 object = { version = "0.32", default-features = false, features = ["std", "read_core", "write", "archive", "coff", "elf", "macho", "pe"] }
diff --git a/compiler/rustc_codegen_cranelift/Readme.md b/compiler/rustc_codegen_cranelift/Readme.md
index 6f2027be96d..5664cbe7d4f 100644
--- a/compiler/rustc_codegen_cranelift/Readme.md
+++ b/compiler/rustc_codegen_cranelift/Readme.md
@@ -8,7 +8,7 @@ If not please open an issue.
 ## Building and testing
 
 ```bash
-$ git clone https://github.com/bjorn3/rustc_codegen_cranelift
+$ git clone https://github.com/rust-lang/rustc_codegen_cranelift
 $ cd rustc_codegen_cranelift
 $ ./y.sh prepare
 $ ./y.sh build
@@ -29,7 +29,7 @@ Extract the `dist` directory in the archive anywhere you want.
 If you want to use `cargo clif build` instead of having to specify the full path to the `cargo-clif` executable, you can add the `bin` subdirectory of the extracted `dist` directory to your `PATH`.
 (tutorial [for Windows](https://stackoverflow.com/a/44272417), and [for Linux/MacOS](https://unix.stackexchange.com/questions/26047/how-to-correctly-add-a-path-to-path/26059#26059)).
 
-[releases]: https://github.com/bjorn3/rustc_codegen_cranelift/releases/tag/dev
+[releases]: https://github.com/rust-lang/rustc_codegen_cranelift/releases/tag/dev
 
 ## Usage
 
@@ -78,7 +78,7 @@ configuration options.
 
 * Inline assembly ([no cranelift support](https://github.com/bytecodealliance/wasmtime/issues/1041))
     * On UNIX there is support for invoking an external assembler for `global_asm!` and `asm!`.
-* SIMD ([tracked here](https://github.com/bjorn3/rustc_codegen_cranelift/issues/171), `std::simd` fully works, `std::arch` is partially supported)
+* SIMD ([tracked here](https://github.com/rust-lang/rustc_codegen_cranelift/issues/171), `std::simd` fully works, `std::arch` is partially supported)
 * Unwinding on panics ([no cranelift support](https://github.com/bytecodealliance/wasmtime/issues/1677), `-Cpanic=abort` is enabled by default)
 
 ## License
diff --git a/compiler/rustc_codegen_cranelift/example/mini_core_hello_world.rs b/compiler/rustc_codegen_cranelift/example/mini_core_hello_world.rs
index 91de04d9770..afc51a47f14 100644
--- a/compiler/rustc_codegen_cranelift/example/mini_core_hello_world.rs
+++ b/compiler/rustc_codegen_cranelift/example/mini_core_hello_world.rs
@@ -353,6 +353,17 @@ fn main() {
 
     let f = V([0.0, 1.0]);
     let _a = f.0[0];
+
+    stack_val_align();
+}
+
+#[inline(never)]
+fn stack_val_align() {
+    #[repr(align(8192))]
+    struct Foo(u8);
+
+    let a = Foo(0);
+    assert_eq!(&a as *const Foo as usize % 8192, 0);
 }
 
 #[cfg(all(
diff --git a/compiler/rustc_codegen_cranelift/patches/0001-regex-Ignore-test-which-gets-miscompiled-with-llvm-sysroot.patch b/compiler/rustc_codegen_cranelift/patches/0001-regex-Ignore-test-which-gets-miscompiled-with-llvm-sysroot.patch
deleted file mode 100644
index e6ebdcec783..00000000000
--- a/compiler/rustc_codegen_cranelift/patches/0001-regex-Ignore-test-which-gets-miscompiled-with-llvm-sysroot.patch
+++ /dev/null
@@ -1,25 +0,0 @@
-From 5d4afb8d807d181038b6a004d17ed055a8d191b2 Mon Sep 17 00:00:00 2001
-From: bjorn3 <17426603+bjorn3@users.noreply.github.com>
-Date: Mon, 2 Oct 2023 13:59:00 +0000
-Subject: [PATCH] Ignore test which gets miscompiled with llvm sysroot
-
----
- regex-automata/src/util/pool.rs | 2 ++
- 1 file changed, 2 insertions(+)
-
-diff --git a/regex-automata/src/util/pool.rs b/regex-automata/src/util/pool.rs
-index c03d7b0..28b233b 100644
---- a/regex-automata/src/util/pool.rs
-+++ b/regex-automata/src/util/pool.rs
-@@ -1081,6 +1081,8 @@ mod tests {
-     // into the pool. This in turn resulted in this test producing a data race.
-     #[cfg(feature = "std")]
-     #[test]
-+    // FIXME(rustc_codegen_cranelift#1395) miscompilation of thread::scope with LLVM sysroot
-+    #[ignore]
-     fn thread_owner_sync() {
-         let pool = Pool::new(|| vec!['a']);
-         {
--- 
-2.34.1
-
diff --git a/compiler/rustc_codegen_cranelift/src/abi/mod.rs b/compiler/rustc_codegen_cranelift/src/abi/mod.rs
index c75ad852f82..c4572e03525 100644
--- a/compiler/rustc_codegen_cranelift/src/abi/mod.rs
+++ b/compiler/rustc_codegen_cranelift/src/abi/mod.rs
@@ -120,32 +120,25 @@ impl<'tcx> FunctionCx<'_, '_, 'tcx> {
         args: &[Value],
     ) -> Cow<'_, [Value]> {
         if self.tcx.sess.target.is_like_windows {
-            let (mut params, mut args): (Vec<_>, Vec<_>) =
-                params
-                    .into_iter()
-                    .zip(args)
-                    .map(|(param, &arg)| {
-                        if param.value_type == types::I128 {
-                            let arg_ptr = Pointer::stack_slot(self.bcx.create_sized_stack_slot(
-                                StackSlotData { kind: StackSlotKind::ExplicitSlot, size: 16 },
-                            ));
-                            arg_ptr.store(self, arg, MemFlags::trusted());
-                            (AbiParam::new(self.pointer_type), arg_ptr.get_addr(self))
-                        } else {
-                            (param, arg)
-                        }
-                    })
-                    .unzip();
+            let (mut params, mut args): (Vec<_>, Vec<_>) = params
+                .into_iter()
+                .zip(args)
+                .map(|(param, &arg)| {
+                    if param.value_type == types::I128 {
+                        let arg_ptr = self.create_stack_slot(16, 16);
+                        arg_ptr.store(self, arg, MemFlags::trusted());
+                        (AbiParam::new(self.pointer_type), arg_ptr.get_addr(self))
+                    } else {
+                        (param, arg)
+                    }
+                })
+                .unzip();
 
             let indirect_ret_val = returns.len() == 1 && returns[0].value_type == types::I128;
 
             if indirect_ret_val {
                 params.insert(0, AbiParam::new(self.pointer_type));
-                let ret_ptr =
-                    Pointer::stack_slot(self.bcx.create_sized_stack_slot(StackSlotData {
-                        kind: StackSlotKind::ExplicitSlot,
-                        size: 16,
-                    }));
+                let ret_ptr = self.create_stack_slot(16, 16);
                 args.insert(0, ret_ptr.get_addr(self));
                 self.lib_call_unadjusted(name, params, vec![], &args);
                 return Cow::Owned(vec![ret_ptr.load(self, types::I128, MemFlags::trusted())]);
diff --git a/compiler/rustc_codegen_cranelift/src/abi/pass_mode.rs b/compiler/rustc_codegen_cranelift/src/abi/pass_mode.rs
index 7c9f8c1051c..06522670029 100644
--- a/compiler/rustc_codegen_cranelift/src/abi/pass_mode.rs
+++ b/compiler/rustc_codegen_cranelift/src/abi/pass_mode.rs
@@ -189,16 +189,13 @@ pub(super) fn from_casted_value<'tcx>(
     let abi_params = cast_target_to_abi_params(cast);
     let abi_param_size: u32 = abi_params.iter().map(|param| param.value_type.bytes()).sum();
     let layout_size = u32::try_from(layout.size.bytes()).unwrap();
-    let stack_slot = fx.bcx.create_sized_stack_slot(StackSlotData {
-        kind: StackSlotKind::ExplicitSlot,
-        // FIXME Don't force the size to a multiple of 16 bytes once Cranelift gets a way to
-        // specify stack slot alignment.
+    let ptr = fx.create_stack_slot(
         // Stack slot size may be bigger for example `[u8; 3]` which is packed into an `i32`.
         // It may also be smaller for example when the type is a wrapper around an integer with a
         // larger alignment than the integer.
-        size: (std::cmp::max(abi_param_size, layout_size) + 15) / 16 * 16,
-    });
-    let ptr = Pointer::stack_slot(stack_slot);
+        std::cmp::max(abi_param_size, layout_size),
+        u32::try_from(layout.align.pref.bytes()).unwrap(),
+    );
     let mut offset = 0;
     let mut block_params_iter = block_params.iter().copied();
     for param in abi_params {
diff --git a/compiler/rustc_codegen_cranelift/src/cast.rs b/compiler/rustc_codegen_cranelift/src/cast.rs
index 7e027c5f1b3..0b5cb1547fc 100644
--- a/compiler/rustc_codegen_cranelift/src/cast.rs
+++ b/compiler/rustc_codegen_cranelift/src/cast.rs
@@ -104,11 +104,7 @@ pub(crate) fn clif_int_or_float_cast(
                     &[from],
                 )[0];
                 // FIXME(bytecodealliance/wasmtime#6104) use bitcast instead of store to get from i64x2 to i128
-                let stack_slot = fx.bcx.create_sized_stack_slot(StackSlotData {
-                    kind: StackSlotKind::ExplicitSlot,
-                    size: 16,
-                });
-                let ret_ptr = Pointer::stack_slot(stack_slot);
+                let ret_ptr = fx.create_stack_slot(16, 16);
                 ret_ptr.store(fx, ret, MemFlags::trusted());
                 ret_ptr.load(fx, types::I128, MemFlags::trusted())
             } else {
diff --git a/compiler/rustc_codegen_cranelift/src/common.rs b/compiler/rustc_codegen_cranelift/src/common.rs
index 7a3ae6ebf52..9771f44f62c 100644
--- a/compiler/rustc_codegen_cranelift/src/common.rs
+++ b/compiler/rustc_codegen_cranelift/src/common.rs
@@ -383,6 +383,25 @@ impl<'tcx> FunctionCx<'_, '_, 'tcx> {
         })
     }
 
+    pub(crate) fn create_stack_slot(&mut self, size: u32, align: u32) -> Pointer {
+        if align <= 16 {
+            let stack_slot = self.bcx.create_sized_stack_slot(StackSlotData {
+                kind: StackSlotKind::ExplicitSlot,
+                // FIXME Don't force the size to a multiple of 16 bytes once Cranelift gets a way to
+                // specify stack slot alignment.
+                size: (size + 15) / 16 * 16,
+            });
+            Pointer::stack_slot(stack_slot)
+        } else {
+            // Alignment is too big to handle using the above hack. Dynamically realign a stack slot
+            // instead. This wastes some space for the realignment.
+            let base_ptr = self.create_stack_slot(size + align, 16).get_addr(self);
+            let misalign_offset = self.bcx.ins().urem_imm(base_ptr, i64::from(align));
+            let realign_offset = self.bcx.ins().irsub_imm(misalign_offset, i64::from(align));
+            Pointer::new(self.bcx.ins().iadd(base_ptr, realign_offset))
+        }
+    }
+
     pub(crate) fn set_debug_loc(&mut self, source_info: mir::SourceInfo) {
         if let Some(debug_context) = &mut self.cx.debug_context {
             let (file, line, column) =
diff --git a/compiler/rustc_codegen_cranelift/src/driver/aot.rs b/compiler/rustc_codegen_cranelift/src/driver/aot.rs
index d3a7decc543..11229dd421e 100644
--- a/compiler/rustc_codegen_cranelift/src/driver/aot.rs
+++ b/compiler/rustc_codegen_cranelift/src/driver/aot.rs
@@ -361,12 +361,26 @@ pub(crate) fn run_aot(
     metadata: EncodedMetadata,
     need_metadata_module: bool,
 ) -> Box<OngoingCodegen> {
+    // FIXME handle `-Ctarget-cpu=native`
+    let target_cpu = match tcx.sess.opts.cg.target_cpu {
+        Some(ref name) => name,
+        None => tcx.sess.target.cpu.as_ref(),
+    }
+    .to_owned();
+
     let cgus = if tcx.sess.opts.output_types.should_codegen() {
         tcx.collect_and_partition_mono_items(()).1
     } else {
         // If only `--emit metadata` is used, we shouldn't perform any codegen.
         // Also `tcx.collect_and_partition_mono_items` may panic in that case.
-        &[]
+        return Box::new(OngoingCodegen {
+            modules: vec![],
+            allocator_module: None,
+            metadata_module: None,
+            metadata,
+            crate_info: CrateInfo::new(tcx, target_cpu),
+            concurrency_limiter: ConcurrencyLimiter::new(tcx.sess, 0),
+        });
     };
 
     if tcx.dep_graph.is_fully_enabled() {
@@ -481,13 +495,6 @@ pub(crate) fn run_aot(
         None
     };
 
-    // FIXME handle `-Ctarget-cpu=native`
-    let target_cpu = match tcx.sess.opts.cg.target_cpu {
-        Some(ref name) => name,
-        None => tcx.sess.target.cpu.as_ref(),
-    }
-    .to_owned();
-
     Box::new(OngoingCodegen {
         modules,
         allocator_module,
diff --git a/compiler/rustc_codegen_cranelift/src/inline_asm.rs b/compiler/rustc_codegen_cranelift/src/inline_asm.rs
index ed077234254..0517c609337 100644
--- a/compiler/rustc_codegen_cranelift/src/inline_asm.rs
+++ b/compiler/rustc_codegen_cranelift/src/inline_asm.rs
@@ -878,13 +878,7 @@ fn call_inline_asm<'tcx>(
     inputs: Vec<(Size, Value)>,
     outputs: Vec<(Size, CPlace<'tcx>)>,
 ) {
-    let stack_slot = fx.bcx.func.create_sized_stack_slot(StackSlotData {
-        kind: StackSlotKind::ExplicitSlot,
-        size: u32::try_from(slot_size.bytes()).unwrap(),
-    });
-    if fx.clif_comments.enabled() {
-        fx.add_comment(stack_slot, "inline asm scratch slot");
-    }
+    let stack_slot = fx.create_stack_slot(u32::try_from(slot_size.bytes()).unwrap(), 16);
 
     let inline_asm_func = fx
         .module
@@ -904,15 +898,23 @@ fn call_inline_asm<'tcx>(
     }
 
     for (offset, value) in inputs {
-        fx.bcx.ins().stack_store(value, stack_slot, i32::try_from(offset.bytes()).unwrap());
+        stack_slot.offset(fx, i32::try_from(offset.bytes()).unwrap().into()).store(
+            fx,
+            value,
+            MemFlags::trusted(),
+        );
     }
 
-    let stack_slot_addr = fx.bcx.ins().stack_addr(fx.pointer_type, stack_slot, 0);
+    let stack_slot_addr = stack_slot.get_addr(fx);
     fx.bcx.ins().call(inline_asm_func, &[stack_slot_addr]);
 
     for (offset, place) in outputs {
         let ty = fx.clif_type(place.layout().ty).unwrap();
-        let value = fx.bcx.ins().stack_load(ty, stack_slot, i32::try_from(offset.bytes()).unwrap());
+        let value = stack_slot.offset(fx, i32::try_from(offset.bytes()).unwrap().into()).load(
+            fx,
+            ty,
+            MemFlags::trusted(),
+        );
         place.write_cvalue(fx, CValue::by_val(value, place.layout()));
     }
 }
diff --git a/compiler/rustc_codegen_cranelift/src/intrinsics/llvm_x86.rs b/compiler/rustc_codegen_cranelift/src/intrinsics/llvm_x86.rs
index 0c9a94e1c23..35f144d7dad 100644
--- a/compiler/rustc_codegen_cranelift/src/intrinsics/llvm_x86.rs
+++ b/compiler/rustc_codegen_cranelift/src/intrinsics/llvm_x86.rs
@@ -310,6 +310,143 @@ pub(crate) fn codegen_x86_llvm_intrinsic_call<'tcx>(
             let val = CValue::by_val_pair(cb_out, c, layout);
             ret.write_cvalue(fx, val);
         }
+        "llvm.x86.sse2.pavg.b" | "llvm.x86.sse2.pavg.w" => {
+            intrinsic_args!(fx, args => (a, b); intrinsic);
+
+            // FIXME use vector instructions when possible
+            simd_pair_for_each_lane(
+                fx,
+                a,
+                b,
+                ret,
+                &|fx, _lane_ty, _res_lane_ty, a_lane, b_lane| {
+                    // (a + b + 1) >> 1
+                    let lane_ty = fx.bcx.func.dfg.value_type(a_lane);
+                    let a_lane = fx.bcx.ins().uextend(lane_ty.double_width().unwrap(), a_lane);
+                    let b_lane = fx.bcx.ins().uextend(lane_ty.double_width().unwrap(), b_lane);
+                    let sum = fx.bcx.ins().iadd(a_lane, b_lane);
+                    let num_plus_one = fx.bcx.ins().iadd_imm(sum, 1);
+                    let res = fx.bcx.ins().ushr_imm(num_plus_one, 1);
+                    fx.bcx.ins().ireduce(lane_ty, res)
+                },
+            );
+        }
+        "llvm.x86.sse2.psra.w" => {
+            intrinsic_args!(fx, args => (a, count); intrinsic);
+
+            let count_lane = count.force_stack(fx).0.load(fx, types::I64, MemFlags::trusted());
+            let lane_ty = fx.clif_type(a.layout().ty.simd_size_and_type(fx.tcx).1).unwrap();
+            let max_count = fx.bcx.ins().iconst(types::I64, i64::from(lane_ty.bits() - 1));
+            let saturated_count = fx.bcx.ins().umin(count_lane, max_count);
+
+            // FIXME use vector instructions when possible
+            simd_for_each_lane(fx, a, ret, &|fx, _lane_ty, _res_lane_ty, a_lane| {
+                fx.bcx.ins().sshr(a_lane, saturated_count)
+            });
+        }
+        "llvm.x86.sse2.psad.bw" => {
+            intrinsic_args!(fx, args => (a, b); intrinsic);
+
+            assert_eq!(a.layout(), b.layout());
+            let layout = a.layout();
+
+            let (lane_count, lane_ty) = layout.ty.simd_size_and_type(fx.tcx);
+            let (ret_lane_count, ret_lane_ty) = ret.layout().ty.simd_size_and_type(fx.tcx);
+            assert_eq!(lane_ty, fx.tcx.types.u8);
+            assert_eq!(ret_lane_ty, fx.tcx.types.u64);
+            assert_eq!(lane_count, ret_lane_count * 8);
+
+            let ret_lane_layout = fx.layout_of(fx.tcx.types.u64);
+            for out_lane_idx in 0..lane_count / 8 {
+                let mut lane_diff_acc = fx.bcx.ins().iconst(types::I64, 0);
+
+                for lane_idx in out_lane_idx * 8..out_lane_idx * 8 + 1 {
+                    let a_lane = a.value_lane(fx, lane_idx).load_scalar(fx);
+                    let b_lane = b.value_lane(fx, lane_idx).load_scalar(fx);
+
+                    let lane_diff = fx.bcx.ins().isub(a_lane, b_lane);
+                    let abs_lane_diff = fx.bcx.ins().iabs(lane_diff);
+                    let abs_lane_diff = fx.bcx.ins().uextend(types::I64, abs_lane_diff);
+                    lane_diff_acc = fx.bcx.ins().iadd(lane_diff_acc, abs_lane_diff);
+                }
+
+                let res_lane = CValue::by_val(lane_diff_acc, ret_lane_layout);
+
+                ret.place_lane(fx, out_lane_idx).write_cvalue(fx, res_lane);
+            }
+        }
+        "llvm.x86.ssse3.pmadd.ub.sw.128" => {
+            intrinsic_args!(fx, args => (a, b); intrinsic);
+
+            let (lane_count, lane_ty) = a.layout().ty.simd_size_and_type(fx.tcx);
+            let (ret_lane_count, ret_lane_ty) = ret.layout().ty.simd_size_and_type(fx.tcx);
+            assert_eq!(lane_ty, fx.tcx.types.u8);
+            assert_eq!(ret_lane_ty, fx.tcx.types.i16);
+            assert_eq!(lane_count, ret_lane_count * 2);
+
+            let ret_lane_layout = fx.layout_of(fx.tcx.types.i16);
+            for out_lane_idx in 0..lane_count / 2 {
+                let a_lane0 = a.value_lane(fx, out_lane_idx * 2).load_scalar(fx);
+                let a_lane0 = fx.bcx.ins().uextend(types::I16, a_lane0);
+                let b_lane0 = b.value_lane(fx, out_lane_idx * 2).load_scalar(fx);
+                let b_lane0 = fx.bcx.ins().sextend(types::I16, b_lane0);
+
+                let a_lane1 = a.value_lane(fx, out_lane_idx * 2 + 1).load_scalar(fx);
+                let a_lane1 = fx.bcx.ins().uextend(types::I16, a_lane1);
+                let b_lane1 = b.value_lane(fx, out_lane_idx * 2 + 1).load_scalar(fx);
+                let b_lane1 = fx.bcx.ins().sextend(types::I16, b_lane1);
+
+                let mul0: Value = fx.bcx.ins().imul(a_lane0, b_lane0);
+                let mul1 = fx.bcx.ins().imul(a_lane1, b_lane1);
+
+                let (val, has_overflow) = fx.bcx.ins().sadd_overflow(mul0, mul1);
+
+                let rhs_ge_zero = fx.bcx.ins().icmp_imm(IntCC::SignedGreaterThanOrEqual, mul1, 0);
+
+                let min = fx.bcx.ins().iconst(types::I16, i64::from(i16::MIN as u16));
+                let max = fx.bcx.ins().iconst(types::I16, i64::from(i16::MAX as u16));
+
+                let sat_val = fx.bcx.ins().select(rhs_ge_zero, max, min);
+                let res_lane = fx.bcx.ins().select(has_overflow, sat_val, val);
+
+                let res_lane = CValue::by_val(res_lane, ret_lane_layout);
+
+                ret.place_lane(fx, out_lane_idx).write_cvalue(fx, res_lane);
+            }
+        }
+        "llvm.x86.sse2.pmadd.wd" => {
+            intrinsic_args!(fx, args => (a, b); intrinsic);
+
+            assert_eq!(a.layout(), b.layout());
+            let layout = a.layout();
+
+            let (lane_count, lane_ty) = layout.ty.simd_size_and_type(fx.tcx);
+            let (ret_lane_count, ret_lane_ty) = ret.layout().ty.simd_size_and_type(fx.tcx);
+            assert_eq!(lane_ty, fx.tcx.types.i16);
+            assert_eq!(ret_lane_ty, fx.tcx.types.i32);
+            assert_eq!(lane_count, ret_lane_count * 2);
+
+            let ret_lane_layout = fx.layout_of(fx.tcx.types.i32);
+            for out_lane_idx in 0..lane_count / 2 {
+                let a_lane0 = a.value_lane(fx, out_lane_idx * 2).load_scalar(fx);
+                let a_lane0 = fx.bcx.ins().uextend(types::I32, a_lane0);
+                let b_lane0 = b.value_lane(fx, out_lane_idx * 2).load_scalar(fx);
+                let b_lane0 = fx.bcx.ins().sextend(types::I32, b_lane0);
+
+                let a_lane1 = a.value_lane(fx, out_lane_idx * 2 + 1).load_scalar(fx);
+                let a_lane1 = fx.bcx.ins().uextend(types::I32, a_lane1);
+                let b_lane1 = b.value_lane(fx, out_lane_idx * 2 + 1).load_scalar(fx);
+                let b_lane1 = fx.bcx.ins().sextend(types::I32, b_lane1);
+
+                let mul0: Value = fx.bcx.ins().imul(a_lane0, b_lane0);
+                let mul1 = fx.bcx.ins().imul(a_lane1, b_lane1);
+
+                let res_lane = fx.bcx.ins().iadd(mul0, mul1);
+                let res_lane = CValue::by_val(res_lane, ret_lane_layout);
+
+                ret.place_lane(fx, out_lane_idx).write_cvalue(fx, res_lane);
+            }
+        }
         _ => {
             fx.tcx
                 .sess
diff --git a/compiler/rustc_codegen_cranelift/src/value_and_place.rs b/compiler/rustc_codegen_cranelift/src/value_and_place.rs
index 3a6a6c9e3f5..5f0aa6c5581 100644
--- a/compiler/rustc_codegen_cranelift/src/value_and_place.rs
+++ b/compiler/rustc_codegen_cranelift/src/value_and_place.rs
@@ -132,18 +132,11 @@ impl<'tcx> CValue<'tcx> {
                 (ptr.get_addr(fx), vtable)
             }
             CValueInner::ByValPair(data, vtable) => {
-                let stack_slot = fx.bcx.create_sized_stack_slot(StackSlotData {
-                    kind: StackSlotKind::ExplicitSlot,
-                    // FIXME Don't force the size to a multiple of 16 bytes once Cranelift gets a way to
-                    // specify stack slot alignment.
-                    size: (u32::try_from(fx.target_config.pointer_type().bytes()).unwrap() + 15)
-                        / 16
-                        * 16,
-                });
-                let data_ptr = Pointer::stack_slot(stack_slot);
-                let mut flags = MemFlags::new();
-                flags.set_notrap();
-                data_ptr.store(fx, data, flags);
+                let data_ptr = fx.create_stack_slot(
+                    u32::try_from(fx.target_config.pointer_type().bytes()).unwrap(),
+                    u32::try_from(fx.target_config.pointer_type().bytes()).unwrap(),
+                );
+                data_ptr.store(fx, data, MemFlags::trusted());
 
                 (data_ptr.get_addr(fx), vtable)
             }
@@ -372,13 +365,11 @@ impl<'tcx> CPlace<'tcx> {
                 .fatal(format!("values of type {} are too big to store on the stack", layout.ty));
         }
 
-        let stack_slot = fx.bcx.create_sized_stack_slot(StackSlotData {
-            kind: StackSlotKind::ExplicitSlot,
-            // FIXME Don't force the size to a multiple of 16 bytes once Cranelift gets a way to
-            // specify stack slot alignment.
-            size: (u32::try_from(layout.size.bytes()).unwrap() + 15) / 16 * 16,
-        });
-        CPlace { inner: CPlaceInner::Addr(Pointer::stack_slot(stack_slot), None), layout }
+        let stack_slot = fx.create_stack_slot(
+            u32::try_from(layout.size.bytes()).unwrap(),
+            u32::try_from(layout.align.pref.bytes()).unwrap(),
+        );
+        CPlace { inner: CPlaceInner::Addr(stack_slot, None), layout }
     }
 
     pub(crate) fn new_var(
@@ -543,13 +534,7 @@ impl<'tcx> CPlace<'tcx> {
                 _ if src_ty.is_vector() && dst_ty.is_vector() => codegen_bitcast(fx, dst_ty, data),
                 _ if src_ty.is_vector() || dst_ty.is_vector() => {
                     // FIXME(bytecodealliance/wasmtime#6104) do something more efficient for transmutes between vectors and integers.
-                    let stack_slot = fx.bcx.create_sized_stack_slot(StackSlotData {
-                        kind: StackSlotKind::ExplicitSlot,
-                        // FIXME Don't force the size to a multiple of 16 bytes once Cranelift gets a way to
-                        // specify stack slot alignment.
-                        size: (src_ty.bytes() + 15) / 16 * 16,
-                    });
-                    let ptr = Pointer::stack_slot(stack_slot);
+                    let ptr = fx.create_stack_slot(src_ty.bytes(), src_ty.bytes());
                     ptr.store(fx, data, MemFlags::trusted());
                     ptr.load(fx, dst_ty, MemFlags::trusted())
                 }