about summary refs log tree commit diff
diff options
context:
space:
mode:
authorbjorn3 <bjorn3@users.noreply.github.com>2021-08-06 16:26:56 +0200
committerbjorn3 <bjorn3@users.noreply.github.com>2021-08-06 16:26:56 +0200
commit279f486960edc43246ea413970f587a82365cca7 (patch)
treea15483e6de5d0c1c26b7a5d8839400f673f8b69a
parent1f94abcda6884893d4723304102089198caa0839 (diff)
parent05677b6bd6c938ed760835d9b1f6514992654ae3 (diff)
downloadrust-279f486960edc43246ea413970f587a82365cca7.tar.gz
rust-279f486960edc43246ea413970f587a82365cca7.zip
Merge commit '05677b6bd6c938ed760835d9b1f6514992654ae3' into sync_cg_clif-2021-08-06
-rw-r--r--compiler/rustc_codegen_cranelift/.github/workflows/main.yml2
-rw-r--r--compiler/rustc_codegen_cranelift/.gitignore1
-rw-r--r--compiler/rustc_codegen_cranelift/Cargo.lock48
-rw-r--r--compiler/rustc_codegen_cranelift/Cargo.toml19
-rw-r--r--compiler/rustc_codegen_cranelift/build_sysroot/Cargo.lock17
-rw-r--r--compiler/rustc_codegen_cranelift/build_system/build_backend.rs2
-rw-r--r--compiler/rustc_codegen_cranelift/build_system/build_sysroot.rs4
-rw-r--r--compiler/rustc_codegen_cranelift/build_system/prepare.rs13
-rwxr-xr-xcompiler/rustc_codegen_cranelift/clean_all.sh2
-rw-r--r--compiler/rustc_codegen_cranelift/docs/usage.md4
-rw-r--r--compiler/rustc_codegen_cranelift/patches/0001-stdsimd-Disable-unsupported-tests.patch165
-rw-r--r--compiler/rustc_codegen_cranelift/patches/0022-sysroot-Disable-not-compiling-tests.patch8
-rw-r--r--compiler/rustc_codegen_cranelift/patches/0023-sysroot-Ignore-failing-tests.patch40
-rw-r--r--compiler/rustc_codegen_cranelift/patches/0027-sysroot-128bit-atomic-operations.patch64
-rw-r--r--compiler/rustc_codegen_cranelift/rust-toolchain2
-rw-r--r--compiler/rustc_codegen_cranelift/scripts/cargo.rs12
-rwxr-xr-xcompiler/rustc_codegen_cranelift/scripts/filter_profile.rs2
-rw-r--r--compiler/rustc_codegen_cranelift/scripts/setup_rust_fork.sh2
-rwxr-xr-xcompiler/rustc_codegen_cranelift/scripts/test_rustc_tests.sh1
-rwxr-xr-xcompiler/rustc_codegen_cranelift/scripts/tests.sh17
-rw-r--r--compiler/rustc_codegen_cranelift/src/abi/mod.rs230
-rw-r--r--compiler/rustc_codegen_cranelift/src/abi/pass_mode.rs20
-rw-r--r--compiler/rustc_codegen_cranelift/src/abi/returning.rs90
-rw-r--r--compiler/rustc_codegen_cranelift/src/allocator.rs10
-rw-r--r--compiler/rustc_codegen_cranelift/src/analyze.rs11
-rw-r--r--compiler/rustc_codegen_cranelift/src/base.rs2
-rw-r--r--compiler/rustc_codegen_cranelift/src/cast.rs19
-rw-r--r--compiler/rustc_codegen_cranelift/src/codegen_i128.rs116
-rw-r--r--compiler/rustc_codegen_cranelift/src/debuginfo/emit.rs2
-rw-r--r--compiler/rustc_codegen_cranelift/src/debuginfo/mod.rs2
-rw-r--r--compiler/rustc_codegen_cranelift/src/intrinsics/mod.rs111
-rw-r--r--compiler/rustc_codegen_cranelift/src/intrinsics/simd.rs190
-rw-r--r--compiler/rustc_codegen_cranelift/src/lib.rs26
-rw-r--r--compiler/rustc_codegen_cranelift/src/num.rs25
-rw-r--r--compiler/rustc_codegen_cranelift/src/optimize/peephole.rs41
-rw-r--r--compiler/rustc_codegen_cranelift/src/trap.rs2
-rw-r--r--compiler/rustc_codegen_cranelift/src/unsize.rs2
-rw-r--r--compiler/rustc_codegen_cranelift/src/value_and_place.rs77
-rw-r--r--compiler/rustc_codegen_cranelift/src/vtable.rs8
-rwxr-xr-xcompiler/rustc_codegen_cranelift/y.rs4
40 files changed, 823 insertions, 590 deletions
diff --git a/compiler/rustc_codegen_cranelift/.github/workflows/main.yml b/compiler/rustc_codegen_cranelift/.github/workflows/main.yml
index f81ac877260..f524b42c5ee 100644
--- a/compiler/rustc_codegen_cranelift/.github/workflows/main.yml
+++ b/compiler/rustc_codegen_cranelift/.github/workflows/main.yml
@@ -49,12 +49,14 @@ jobs:
     - name: Install MinGW toolchain and wine
       if: matrix.os == 'ubuntu-latest' && matrix.env.TARGET_TRIPLE == 'x86_64-pc-windows-gnu'
       run: |
+        sudo apt-get update
         sudo apt-get install -y gcc-mingw-w64-x86-64 wine-stable
         rustup target add x86_64-pc-windows-gnu
 
     - name: Install AArch64 toolchain and qemu
       if: matrix.os == 'ubuntu-latest' && matrix.env.TARGET_TRIPLE == 'aarch64-unknown-linux-gnu'
       run: |
+        sudo apt-get update
         sudo apt-get install -y gcc-aarch64-linux-gnu qemu-user
 
     - name: Prepare dependencies
diff --git a/compiler/rustc_codegen_cranelift/.gitignore b/compiler/rustc_codegen_cranelift/.gitignore
index 12e779fe7c7..25080488a88 100644
--- a/compiler/rustc_codegen_cranelift/.gitignore
+++ b/compiler/rustc_codegen_cranelift/.gitignore
@@ -15,3 +15,4 @@ perf.data.old
 /rand
 /regex
 /simple-raytracer
+/stdsimd
diff --git a/compiler/rustc_codegen_cranelift/Cargo.lock b/compiler/rustc_codegen_cranelift/Cargo.lock
index 56d0974b253..23c1fdc6ee4 100644
--- a/compiler/rustc_codegen_cranelift/Cargo.lock
+++ b/compiler/rustc_codegen_cranelift/Cargo.lock
@@ -4,9 +4,9 @@ version = 3
 
 [[package]]
 name = "anyhow"
-version = "1.0.38"
+version = "1.0.42"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "afddf7f520a80dbf76e6f50a35bca42a2331ef227a28b3b6dc5c2e2338d114b1"
+checksum = "595d3cfa7a60d4555cb5067b99f07142a08ea778de5cf993f7b75c7d8fabc486"
 
 [[package]]
 name = "ar"
@@ -34,7 +34,7 @@ checksum = "baf1de4339761588bc0619e3cbc0120ee582ebb74b53b4efbf79117bd2da40fd"
 [[package]]
 name = "cranelift-bforest"
 version = "0.75.0"
-source = "git+https://github.com/bytecodealliance/wasmtime.git?branch=main#c71ad9490e7f3e19bbcae7e28bbe50f8a0b4a5d8"
+source = "git+https://github.com/bytecodealliance/wasmtime.git#5deda279775dca5e37449c829cda1f6276d6542b"
 dependencies = [
  "cranelift-entity",
 ]
@@ -42,7 +42,7 @@ dependencies = [
 [[package]]
 name = "cranelift-codegen"
 version = "0.75.0"
-source = "git+https://github.com/bytecodealliance/wasmtime.git?branch=main#c71ad9490e7f3e19bbcae7e28bbe50f8a0b4a5d8"
+source = "git+https://github.com/bytecodealliance/wasmtime.git#5deda279775dca5e37449c829cda1f6276d6542b"
 dependencies = [
  "cranelift-bforest",
  "cranelift-codegen-meta",
@@ -58,7 +58,7 @@ dependencies = [
 [[package]]
 name = "cranelift-codegen-meta"
 version = "0.75.0"
-source = "git+https://github.com/bytecodealliance/wasmtime.git?branch=main#c71ad9490e7f3e19bbcae7e28bbe50f8a0b4a5d8"
+source = "git+https://github.com/bytecodealliance/wasmtime.git#5deda279775dca5e37449c829cda1f6276d6542b"
 dependencies = [
  "cranelift-codegen-shared",
  "cranelift-entity",
@@ -67,17 +67,17 @@ dependencies = [
 [[package]]
 name = "cranelift-codegen-shared"
 version = "0.75.0"
-source = "git+https://github.com/bytecodealliance/wasmtime.git?branch=main#c71ad9490e7f3e19bbcae7e28bbe50f8a0b4a5d8"
+source = "git+https://github.com/bytecodealliance/wasmtime.git#5deda279775dca5e37449c829cda1f6276d6542b"
 
 [[package]]
 name = "cranelift-entity"
 version = "0.75.0"
-source = "git+https://github.com/bytecodealliance/wasmtime.git?branch=main#c71ad9490e7f3e19bbcae7e28bbe50f8a0b4a5d8"
+source = "git+https://github.com/bytecodealliance/wasmtime.git#5deda279775dca5e37449c829cda1f6276d6542b"
 
 [[package]]
 name = "cranelift-frontend"
 version = "0.75.0"
-source = "git+https://github.com/bytecodealliance/wasmtime.git?branch=main#c71ad9490e7f3e19bbcae7e28bbe50f8a0b4a5d8"
+source = "git+https://github.com/bytecodealliance/wasmtime.git#5deda279775dca5e37449c829cda1f6276d6542b"
 dependencies = [
  "cranelift-codegen",
  "log",
@@ -88,7 +88,7 @@ dependencies = [
 [[package]]
 name = "cranelift-jit"
 version = "0.75.0"
-source = "git+https://github.com/bytecodealliance/wasmtime.git?branch=main#c71ad9490e7f3e19bbcae7e28bbe50f8a0b4a5d8"
+source = "git+https://github.com/bytecodealliance/wasmtime.git#5deda279775dca5e37449c829cda1f6276d6542b"
 dependencies = [
  "anyhow",
  "cranelift-codegen",
@@ -105,7 +105,7 @@ dependencies = [
 [[package]]
 name = "cranelift-module"
 version = "0.75.0"
-source = "git+https://github.com/bytecodealliance/wasmtime.git?branch=main#c71ad9490e7f3e19bbcae7e28bbe50f8a0b4a5d8"
+source = "git+https://github.com/bytecodealliance/wasmtime.git#5deda279775dca5e37449c829cda1f6276d6542b"
 dependencies = [
  "anyhow",
  "cranelift-codegen",
@@ -116,7 +116,7 @@ dependencies = [
 [[package]]
 name = "cranelift-native"
 version = "0.75.0"
-source = "git+https://github.com/bytecodealliance/wasmtime.git?branch=main#c71ad9490e7f3e19bbcae7e28bbe50f8a0b4a5d8"
+source = "git+https://github.com/bytecodealliance/wasmtime.git#5deda279775dca5e37449c829cda1f6276d6542b"
 dependencies = [
  "cranelift-codegen",
  "libc",
@@ -126,7 +126,7 @@ dependencies = [
 [[package]]
 name = "cranelift-object"
 version = "0.75.0"
-source = "git+https://github.com/bytecodealliance/wasmtime.git?branch=main#c71ad9490e7f3e19bbcae7e28bbe50f8a0b4a5d8"
+source = "git+https://github.com/bytecodealliance/wasmtime.git#5deda279775dca5e37449c829cda1f6276d6542b"
 dependencies = [
  "anyhow",
  "cranelift-codegen",
@@ -147,24 +147,24 @@ dependencies = [
 
 [[package]]
 name = "gimli"
-version = "0.24.0"
+version = "0.25.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "0e4075386626662786ddb0ec9081e7c7eeb1ba31951f447ca780ef9f5d568189"
+checksum = "f0a01e0497841a3b2db4f8afa483cce65f7e96a3498bd6c541734792aeac8fe7"
 dependencies = [
  "indexmap",
 ]
 
 [[package]]
 name = "hashbrown"
-version = "0.9.1"
+version = "0.11.2"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "d7afe4a420e3fe79967a00898cc1f4db7c8a49a9333a29f8a4bd76a253d5cd04"
+checksum = "ab5ef0d4909ef3724cc8cce6ccc8572c5c817592e9285f5464f8e86f8bd3726e"
 
 [[package]]
 name = "indexmap"
-version = "1.6.1"
+version = "1.7.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "4fb1fa934250de4de8aef298d81c729a7d33d8c239daa3a7575e6b92bfc7313b"
+checksum = "bc633605454125dec4b66843673f01c7df2b89479b32e0ed634e43a91cff62a5"
 dependencies = [
  "autocfg",
  "hashbrown",
@@ -172,9 +172,9 @@ dependencies = [
 
 [[package]]
 name = "libc"
-version = "0.2.97"
+version = "0.2.98"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "12b8adadd720df158f4d70dfe7ccc6adb0472d7c55ca83445f6a5ab3e36f8fb6"
+checksum = "320cfe77175da3a483efed4bc0adc1968ca050b098ce4f2f1c13a56626128790"
 
 [[package]]
 name = "libloading"
@@ -212,9 +212,9 @@ checksum = "b16bd47d9e329435e309c58469fe0791c2d0d1ba96ec0954152a5ae2b04387dc"
 
 [[package]]
 name = "object"
-version = "0.25.3"
+version = "0.26.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "a38f2be3697a57b4060074ff41b44c16870d916ad7877c17696e063257482bc7"
+checksum = "c55827317fb4c08822499848a14237d2874d6f139828893017237e7ab93eb386"
 dependencies = [
  "crc32fast",
  "indexmap",
@@ -277,9 +277,9 @@ checksum = "fe0f37c9e8f3c5a4a66ad655a93c74daac4ad00c441533bf5c6e7990bb42604e"
 
 [[package]]
 name = "target-lexicon"
-version = "0.12.0"
+version = "0.12.1"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "64ae3b39281e4b14b8123bdbaddd472b7dfe215e444181f2f9d2443c2444f834"
+checksum = "b0652da4c4121005e9ed22b79f6c5f2d9e2752906b53a33e9490489ba421a6fb"
 
 [[package]]
 name = "winapi"
diff --git a/compiler/rustc_codegen_cranelift/Cargo.toml b/compiler/rustc_codegen_cranelift/Cargo.toml
index 6593ac738fe..6f40fc0fcb8 100644
--- a/compiler/rustc_codegen_cranelift/Cargo.toml
+++ b/compiler/rustc_codegen_cranelift/Cargo.toml
@@ -8,15 +8,15 @@ crate-type = ["dylib"]
 
 [dependencies]
 # These have to be in sync with each other
-cranelift-codegen = { git = "https://github.com/bytecodealliance/wasmtime.git", branch = "main", features = ["unwind", "all-arch"] }
-cranelift-frontend = { git = "https://github.com/bytecodealliance/wasmtime.git", branch = "main" }
-cranelift-module = { git = "https://github.com/bytecodealliance/wasmtime.git", branch = "main" }
-cranelift-native = { git = "https://github.com/bytecodealliance/wasmtime.git", branch = "main" }
-cranelift-jit = { git = "https://github.com/bytecodealliance/wasmtime.git", branch = "main", optional = true }
-cranelift-object = { git = "https://github.com/bytecodealliance/wasmtime.git", branch = "main" }
+cranelift-codegen = { git = "https://github.com/bytecodealliance/wasmtime.git", features = ["unwind", "all-arch"] }
+cranelift-frontend = { git = "https://github.com/bytecodealliance/wasmtime.git" }
+cranelift-module = { git = "https://github.com/bytecodealliance/wasmtime.git" }
+cranelift-native = { git = "https://github.com/bytecodealliance/wasmtime.git" }
+cranelift-jit = { git = "https://github.com/bytecodealliance/wasmtime.git", optional = true }
+cranelift-object = { git = "https://github.com/bytecodealliance/wasmtime.git" }
 target-lexicon = "0.12.0"
-gimli = { version = "0.24.0", default-features = false, features = ["write"]}
-object = { version = "0.25.0", default-features = false, features = ["std", "read_core", "write", "archive", "coff", "elf", "macho", "pe"] }
+gimli = { version = "0.25.0", default-features = false, features = ["write"]}
+object = { version = "0.26.0", default-features = false, features = ["std", "read_core", "write", "archive", "coff", "elf", "macho", "pe"] }
 
 ar = { git = "https://github.com/bjorn3/rust-ar.git", branch = "do_not_remove_cg_clif_ranlib" }
 indexmap = "1.0.2"
@@ -36,7 +36,8 @@ smallvec = "1.6.1"
 #gimli = { path = "../" }
 
 [features]
-default = ["jit", "inline_asm"]
+# Enable features not ready to be enabled when compiling as part of rustc
+unstable-features = ["jit", "inline_asm"]
 jit = ["cranelift-jit", "libloading"]
 inline_asm = []
 
diff --git a/compiler/rustc_codegen_cranelift/build_sysroot/Cargo.lock b/compiler/rustc_codegen_cranelift/build_sysroot/Cargo.lock
index 46f661107e7..e068f084234 100644
--- a/compiler/rustc_codegen_cranelift/build_sysroot/Cargo.lock
+++ b/compiler/rustc_codegen_cranelift/build_sysroot/Cargo.lock
@@ -40,9 +40,9 @@ checksum = "cdb031dd78e28731d87d56cc8ffef4a8f36ca26c38fe2de700543e627f8a464a"
 
 [[package]]
 name = "cc"
-version = "1.0.68"
+version = "1.0.69"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "4a72c244c1ff497a746a7e1fb3d14bd08420ecda70c8f25c7112f2781652d787"
+checksum = "e70cc2f62c6ce1868963827bd677764c62d07c3d9a3e1fb1177ee1a9ab199eb2"
 
 [[package]]
 name = "cfg-if"
@@ -132,9 +132,9 @@ dependencies = [
 
 [[package]]
 name = "libc"
-version = "0.2.97"
+version = "0.2.98"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "12b8adadd720df158f4d70dfe7ccc6adb0472d7c55ca83445f6a5ab3e36f8fb6"
+checksum = "320cfe77175da3a483efed4bc0adc1968ca050b098ce4f2f1c13a56626128790"
 dependencies = [
  "rustc-std-workspace-core",
 ]
@@ -271,14 +271,6 @@ dependencies = [
 ]
 
 [[package]]
-name = "term"
-version = "0.0.0"
-dependencies = [
- "core",
- "std",
-]
-
-[[package]]
 name = "test"
 version = "0.0.0"
 dependencies = [
@@ -290,7 +282,6 @@ dependencies = [
  "panic_unwind",
  "proc_macro",
  "std",
- "term",
 ]
 
 [[package]]
diff --git a/compiler/rustc_codegen_cranelift/build_system/build_backend.rs b/compiler/rustc_codegen_cranelift/build_system/build_backend.rs
index 1df2bcc4541..150b6d01a6b 100644
--- a/compiler/rustc_codegen_cranelift/build_system/build_backend.rs
+++ b/compiler/rustc_codegen_cranelift/build_system/build_backend.rs
@@ -4,7 +4,7 @@ use std::process::Command;
 
 pub(crate) fn build_backend(channel: &str, host_triple: &str) -> PathBuf {
     let mut cmd = Command::new("cargo");
-    cmd.arg("build").arg("--target").arg(host_triple);
+    cmd.arg("build").arg("--target").arg(host_triple).arg("--features").arg("unstable-features");
 
     match channel {
         "debug" => {}
diff --git a/compiler/rustc_codegen_cranelift/build_system/build_sysroot.rs b/compiler/rustc_codegen_cranelift/build_system/build_sysroot.rs
index 9fb88c27961..642abc41f45 100644
--- a/compiler/rustc_codegen_cranelift/build_system/build_sysroot.rs
+++ b/compiler/rustc_codegen_cranelift/build_system/build_sysroot.rs
@@ -91,7 +91,9 @@ pub(crate) fn build_sysroot(
             {
                 let file = file.unwrap().path();
                 let file_name_str = file.file_name().unwrap().to_str().unwrap();
-                if file_name_str.contains("rustc_")
+                if (file_name_str.contains("rustc_")
+                    && !file_name_str.contains("rustc_std_workspace_")
+                    && !file_name_str.contains("rustc_demangle"))
                     || file_name_str.contains("chalk")
                     || file_name_str.contains("tracing")
                     || file_name_str.contains("regex")
diff --git a/compiler/rustc_codegen_cranelift/build_system/prepare.rs b/compiler/rustc_codegen_cranelift/build_system/prepare.rs
index 401b8271abc..4b2051b605a 100644
--- a/compiler/rustc_codegen_cranelift/build_system/prepare.rs
+++ b/compiler/rustc_codegen_cranelift/build_system/prepare.rs
@@ -28,6 +28,13 @@ pub(crate) fn prepare() {
     );
 
     clone_repo(
+        "stdsimd",
+        "https://github.com/rust-lang/stdsimd",
+        "be96995d8ddec03fac9a0caf4d4c51c7fbc33507",
+    );
+    apply_patches("stdsimd", Path::new("stdsimd"));
+
+    clone_repo(
         "simple-raytracer",
         "https://github.com/ebobby/simple-raytracer",
         "804a7a21b9e673a482797aa289a18ed480e4d813",
@@ -60,11 +67,7 @@ fn prepare_sysroot() {
     copy_dir_recursively(&sysroot_src_orig.join("library"), &sysroot_src.join("library"));
 
     let rustc_version = get_rustc_version();
-    fs::write(
-        Path::new("build_sysroot").join("rustc_version"),
-        &rustc_version,
-    )
-    .unwrap();
+    fs::write(Path::new("build_sysroot").join("rustc_version"), &rustc_version).unwrap();
 
     eprintln!("[GIT] init");
     let mut git_init_cmd = Command::new("git");
diff --git a/compiler/rustc_codegen_cranelift/clean_all.sh b/compiler/rustc_codegen_cranelift/clean_all.sh
index f4f8c82d69f..23e5bf2e0a8 100755
--- a/compiler/rustc_codegen_cranelift/clean_all.sh
+++ b/compiler/rustc_codegen_cranelift/clean_all.sh
@@ -3,4 +3,4 @@ set -e
 
 rm -rf build_sysroot/{sysroot_src/,target/,compiler-builtins/,rustc_version}
 rm -rf target/ build/ perf.data{,.old}
-rm -rf rand/ regex/ simple-raytracer/
+rm -rf rand/ regex/ simple-raytracer/ stdsimd/
diff --git a/compiler/rustc_codegen_cranelift/docs/usage.md b/compiler/rustc_codegen_cranelift/docs/usage.md
index 956d5905a97..87eec0e818b 100644
--- a/compiler/rustc_codegen_cranelift/docs/usage.md
+++ b/compiler/rustc_codegen_cranelift/docs/usage.md
@@ -36,7 +36,7 @@ $ $cg_clif_dir/build/cargo jit
 or
 
 ```bash
-$ $cg_clif_dir/build/bin/cg_clif -Cllvm-args=mode=jit -Cprefer-dynamic my_crate.rs
+$ $cg_clif_dir/build/bin/cg_clif -Zunstable-features -Cllvm-args=mode=jit -Cprefer-dynamic my_crate.rs
 ```
 
 There is also an experimental lazy jit mode. In this mode functions are only compiled once they are
@@ -52,7 +52,7 @@ These are a few functions that allow you to easily run rust code from the shell
 
 ```bash
 function jit_naked() {
-    echo "$@" | $cg_clif_dir/build/bin/cg_clif - -Cllvm-args=mode=jit -Cprefer-dynamic
+    echo "$@" | $cg_clif_dir/build/bin/cg_clif - -Zunstable-features -Cllvm-args=mode=jit -Cprefer-dynamic
 }
 
 function jit() {
diff --git a/compiler/rustc_codegen_cranelift/patches/0001-stdsimd-Disable-unsupported-tests.patch b/compiler/rustc_codegen_cranelift/patches/0001-stdsimd-Disable-unsupported-tests.patch
new file mode 100644
index 00000000000..731c60fda58
--- /dev/null
+++ b/compiler/rustc_codegen_cranelift/patches/0001-stdsimd-Disable-unsupported-tests.patch
@@ -0,0 +1,165 @@
+From 6bfce5dc2cbf834c74dbccb7538adc08c6eb57e7 Mon Sep 17 00:00:00 2001
+From: bjorn3 <bjorn3@users.noreply.github.com>
+Date: Sun, 25 Jul 2021 18:39:31 +0200
+Subject: [PATCH] Disable unsupported tests
+
+---
+ crates/core_simd/src/array.rs        |  2 ++
+ crates/core_simd/src/lib.rs          |  2 +-
+ crates/core_simd/src/math.rs         |  4 ++++
+ crates/core_simd/tests/masks.rs      | 12 ------------
+ crates/core_simd/tests/ops_macros.rs |  6 ++++++
+ crates/core_simd/tests/round.rs      |  2 ++
+ 6 files changed, 15 insertions(+), 13 deletions(-)
+
+diff --git a/crates/core_simd/src/array.rs b/crates/core_simd/src/array.rs
+index 25c5309..2b3d819 100644
+--- a/crates/core_simd/src/array.rs
++++ b/crates/core_simd/src/array.rs
+@@ -22,6 +22,7 @@ where
+     #[must_use]
+     fn splat(val: Self::Scalar) -> Self;
+ 
++    /*
+     /// SIMD gather: construct a SIMD vector by reading from a slice, using potentially discontiguous indices.
+     /// If an index is out of bounds, that lane instead selects the value from the "or" vector.
+     /// ```
+@@ -150,6 +151,7 @@ where
+             // Cleared ☢️ *mut T Zone
+         }
+     }
++    */
+ }
+ 
+ macro_rules! impl_simdarray_for {
+diff --git a/crates/core_simd/src/lib.rs b/crates/core_simd/src/lib.rs
+index a64904d..299eb11 100644
+--- a/crates/core_simd/src/lib.rs
++++ b/crates/core_simd/src/lib.rs
+@@ -1,7 +1,7 @@
+ #![no_std]
+ #![allow(incomplete_features)]
+ #![feature(
+-    const_generics, 
++    const_generics,
+     platform_intrinsics,
+     repr_simd,
+     simd_ffi,
+diff --git a/crates/core_simd/src/math.rs b/crates/core_simd/src/math.rs
+index 7290a28..e394730 100644
+--- a/crates/core_simd/src/math.rs
++++ b/crates/core_simd/src/math.rs
+@@ -2,6 +2,7 @@ macro_rules! impl_uint_arith {
+     ($(($name:ident, $n:ident)),+) => {
+         $( impl<const LANES: usize> $name<LANES> where Self: crate::LanesAtMost32 {
+ 
++            /*
+             /// Lanewise saturating add.
+             ///
+             /// # Examples
+@@ -38,6 +39,7 @@ macro_rules! impl_uint_arith {
+             pub fn saturating_sub(self, second: Self) -> Self {
+                 unsafe { crate::intrinsics::simd_saturating_sub(self, second) }
+             }
++            */
+         })+
+     }
+ }
+@@ -46,6 +48,7 @@ macro_rules! impl_int_arith {
+     ($(($name:ident, $n:ident)),+) => {
+         $( impl<const LANES: usize> $name<LANES> where Self: crate::LanesAtMost32 {
+ 
++            /*
+             /// Lanewise saturating add.
+             ///
+             /// # Examples
+@@ -141,6 +144,7 @@ macro_rules! impl_int_arith {
+             pub fn saturating_neg(self) -> Self {
+                 Self::splat(0).saturating_sub(self)
+             }
++            */
+         })+
+     }
+ }
+diff --git a/crates/core_simd/tests/masks.rs b/crates/core_simd/tests/masks.rs
+index 61d8e44..2bccae2 100644
+--- a/crates/core_simd/tests/masks.rs
++++ b/crates/core_simd/tests/masks.rs
+@@ -67,18 +67,6 @@ macro_rules! test_mask_api {
+                 assert_eq!(int.to_array(), [-1, 0, 0, -1, 0, 0, -1, 0]);
+                 assert_eq!(core_simd::$name::<8>::from_int(int), mask);
+             }
+-
+-            #[test]
+-            fn roundtrip_bitmask_conversion() {
+-                let values = [
+-                    true, false, false, true, false, false, true, false,
+-                    true, true, false, false, false, false, false, true,
+-                ];
+-                let mask = core_simd::$name::<16>::from_array(values);
+-                let bitmask = mask.to_bitmask();
+-                assert_eq!(bitmask, [0b01001001, 0b10000011]);
+-                assert_eq!(core_simd::$name::<16>::from_bitmask(bitmask), mask);
+-            }
+         }
+     }
+ }
+diff --git a/crates/core_simd/tests/ops_macros.rs b/crates/core_simd/tests/ops_macros.rs
+index cb39e73..fc0ebe1 100644
+--- a/crates/core_simd/tests/ops_macros.rs
++++ b/crates/core_simd/tests/ops_macros.rs
+@@ -435,6 +435,7 @@ macro_rules! impl_float_tests {
+                     )
+                 }
+ 
++                /*
+                 fn mul_add<const LANES: usize>() {
+                     test_helpers::test_ternary_elementwise(
+                         &Vector::<LANES>::mul_add,
+@@ -442,6 +443,7 @@ macro_rules! impl_float_tests {
+                         &|_, _, _| true,
+                     )
+                 }
++                */
+ 
+                 fn sqrt<const LANES: usize>() {
+                     test_helpers::test_unary_elementwise(
+@@ -581,6 +585,7 @@ macro_rules! impl_float_tests {
+                     });
+                 }
+ 
++                /*
+                 fn horizontal_max<const LANES: usize>() {
+                     test_helpers::test_1(&|x| {
+                         let vmax = Vector::<LANES>::from_array(x).horizontal_max();
+@@ -604,6 +609,7 @@ macro_rules! impl_float_tests {
+                         Ok(())
+                     });
+                 }
++                */
+             }
+         }
+     }
+diff --git a/crates/core_simd/tests/round.rs b/crates/core_simd/tests/round.rs
+index 37044a7..4cdc6b7 100644
+--- a/crates/core_simd/tests/round.rs
++++ b/crates/core_simd/tests/round.rs
+@@ -25,6 +25,7 @@ macro_rules! float_rounding_test {
+                     )
+                 }
+ 
++                /*
+                 fn round<const LANES: usize>() {
+                     test_helpers::test_unary_elementwise(
+                         &Vector::<LANES>::round,
+@@ -32,6 +33,7 @@ macro_rules! float_rounding_test {
+                         &|_| true,
+                     )
+                 }
++                */
+ 
+                 fn trunc<const LANES: usize>() {
+                     test_helpers::test_unary_elementwise(
+-- 
+2.26.2.7.g19db9cfb68
+
diff --git a/compiler/rustc_codegen_cranelift/patches/0022-sysroot-Disable-not-compiling-tests.patch b/compiler/rustc_codegen_cranelift/patches/0022-sysroot-Disable-not-compiling-tests.patch
index ba0eaacd828..25a315f666e 100644
--- a/compiler/rustc_codegen_cranelift/patches/0022-sysroot-Disable-not-compiling-tests.patch
+++ b/compiler/rustc_codegen_cranelift/patches/0022-sysroot-Disable-not-compiling-tests.patch
@@ -51,14 +51,14 @@ index 1a6be3a..42dbd59 100644
  #[test]
  #[allow(warnings)]
  // Have a symbol for the test below. It doesn’t need to be an actual variadic function, match the
-@@ -289,6 +290,7 @@ fn write_unaligned_drop() {
-     }
-     DROPS.with(|d| assert_eq!(*d.borrow(), [0]));
+@@ -277,6 +277,7 @@ pub fn test_variadic_fnptr() {
+     let mut s = SipHasher::new();
+     assert_eq!(p.hash(&mut s), q.hash(&mut s));
  }
 +*/
  
  #[test]
- fn align_offset_zst() {
+ fn write_unaligned_drop() {
 diff --git a/library/core/tests/slice.rs b/library/core/tests/slice.rs
 index 6609bc3..241b497 100644
 --- a/library/core/tests/slice.rs
diff --git a/compiler/rustc_codegen_cranelift/patches/0023-sysroot-Ignore-failing-tests.patch b/compiler/rustc_codegen_cranelift/patches/0023-sysroot-Ignore-failing-tests.patch
index 5d2c3049f60..50ef0bd9418 100644
--- a/compiler/rustc_codegen_cranelift/patches/0023-sysroot-Ignore-failing-tests.patch
+++ b/compiler/rustc_codegen_cranelift/patches/0023-sysroot-Ignore-failing-tests.patch
@@ -46,45 +46,5 @@ index 4bc44e9..8e3c7a4 100644
  
  #[test]
  fn cell_allows_array_cycle() {
-diff --git a/library/core/tests/num/mod.rs b/library/core/tests/num/mod.rs
-index a17c094..5bb11d2 100644
---- a/library/core/tests/num/mod.rs
-+++ b/library/core/tests/num/mod.rs
-@@ -651,11 +651,12 @@ macro_rules! test_float {
-                 assert_eq!((9.0 as $fty).min($neginf), $neginf);
-                 assert_eq!(($neginf as $fty).min(-9.0), $neginf);
-                 assert_eq!((-9.0 as $fty).min($neginf), $neginf);
--                assert_eq!(($nan as $fty).min(9.0), 9.0);
--                assert_eq!(($nan as $fty).min(-9.0), -9.0);
--                assert_eq!((9.0 as $fty).min($nan), 9.0);
--                assert_eq!((-9.0 as $fty).min($nan), -9.0);
--                assert!(($nan as $fty).min($nan).is_nan());
-+                // Cranelift fmin has NaN propagation
-+                //assert_eq!(($nan as $fty).min(9.0), 9.0);
-+                //assert_eq!(($nan as $fty).min(-9.0), -9.0);
-+                //assert_eq!((9.0 as $fty).min($nan), 9.0);
-+                //assert_eq!((-9.0 as $fty).min($nan), -9.0);
-+                //assert!(($nan as $fty).min($nan).is_nan());
-             }
-             #[test]
-             fn max() {
-@@ -673,11 +674,12 @@ macro_rules! test_float {
-                 assert_eq!((9.0 as $fty).max($neginf), 9.0);
-                 assert_eq!(($neginf as $fty).max(-9.0), -9.0);
-                 assert_eq!((-9.0 as $fty).max($neginf), -9.0);
--                assert_eq!(($nan as $fty).max(9.0), 9.0);
--                assert_eq!(($nan as $fty).max(-9.0), -9.0);
--                assert_eq!((9.0 as $fty).max($nan), 9.0);
--                assert_eq!((-9.0 as $fty).max($nan), -9.0);
--                assert!(($nan as $fty).max($nan).is_nan());
-+                // Cranelift fmax has NaN propagation
-+                //assert_eq!(($nan as $fty).max(9.0), 9.0);
-+                //assert_eq!(($nan as $fty).max(-9.0), -9.0);
-+                //assert_eq!((9.0 as $fty).max($nan), 9.0);
-+                //assert_eq!((-9.0 as $fty).max($nan), -9.0);
-+                //assert!(($nan as $fty).max($nan).is_nan());
-             }
-             #[test]
-             fn rem_euclid() {
 -- 
 2.21.0 (Apple Git-122)
diff --git a/compiler/rustc_codegen_cranelift/patches/0027-sysroot-128bit-atomic-operations.patch b/compiler/rustc_codegen_cranelift/patches/0027-sysroot-128bit-atomic-operations.patch
index 32e59309690..cda8153083c 100644
--- a/compiler/rustc_codegen_cranelift/patches/0027-sysroot-128bit-atomic-operations.patch
+++ b/compiler/rustc_codegen_cranelift/patches/0027-sysroot-128bit-atomic-operations.patch
@@ -1,20 +1,44 @@
-From 894e07dfec2624ba539129b1c1d63e1d7d812bda Mon Sep 17 00:00:00 2001
+From 6a4e6f5dc8c8a529a822eb9b57f9e57519595439 Mon Sep 17 00:00:00 2001
 From: bjorn3 <bjorn3@users.noreply.github.com>
 Date: Thu, 18 Feb 2021 18:45:28 +0100
 Subject: [PATCH] Disable 128bit atomic operations
 
 Cranelift doesn't support them yet
 ---
- library/core/src/sync/atomic.rs | 38 ---------------------------------
- library/core/tests/atomic.rs    |  4 ----
- library/std/src/panic.rs        |  6 ------
+ library/core/src/panic/unwind_safe.rs |  6 -----
+ library/core/src/sync/atomic.rs       | 38 ---------------------------
+ library/core/tests/atomic.rs          |  4 ---
  3 files changed, 48 deletions(-)
 
+diff --git a/library/core/src/panic/unwind_safe.rs b/library/core/src/panic/unwind_safe.rs
+index 092b7cf..158cf71 100644
+--- a/library/core/src/panic/unwind_safe.rs
++++ b/library/core/src/panic/unwind_safe.rs
+@@ -216,9 +216,6 @@ impl RefUnwindSafe for crate::sync::atomic::AtomicI32 {}
+ #[cfg(target_has_atomic_load_store = "64")]
+ #[stable(feature = "integer_atomics_stable", since = "1.34.0")]
+ impl RefUnwindSafe for crate::sync::atomic::AtomicI64 {}
+-#[cfg(target_has_atomic_load_store = "128")]
+-#[unstable(feature = "integer_atomics", issue = "32976")]
+-impl RefUnwindSafe for crate::sync::atomic::AtomicI128 {}
+ 
+ #[cfg(target_has_atomic_load_store = "ptr")]
+ #[stable(feature = "unwind_safe_atomic_refs", since = "1.14.0")]
+@@ -235,9 +232,6 @@ impl RefUnwindSafe for crate::sync::atomic::AtomicU32 {}
+ #[cfg(target_has_atomic_load_store = "64")]
+ #[stable(feature = "integer_atomics_stable", since = "1.34.0")]
+ impl RefUnwindSafe for crate::sync::atomic::AtomicU64 {}
+-#[cfg(target_has_atomic_load_store = "128")]
+-#[unstable(feature = "integer_atomics", issue = "32976")]
+-impl RefUnwindSafe for crate::sync::atomic::AtomicU128 {}
+ 
+ #[cfg(target_has_atomic_load_store = "8")]
+ #[stable(feature = "unwind_safe_atomic_refs", since = "1.14.0")]
 diff --git a/library/core/src/sync/atomic.rs b/library/core/src/sync/atomic.rs
-index 81c9e1d..65c9503 100644
+index 0194c58..25a0038 100644
 --- a/library/core/src/sync/atomic.rs
 +++ b/library/core/src/sync/atomic.rs
-@@ -2228,44 +2228,6 @@ atomic_int! {
+@@ -2229,44 +2229,6 @@ atomic_int! {
      "AtomicU64::new(0)",
      u64 AtomicU64 ATOMIC_U64_INIT
  }
@@ -60,10 +84,10 @@ index 81c9e1d..65c9503 100644
  macro_rules! atomic_int_ptr_sized {
      ( $($target_pointer_width:literal $align:literal)* ) => { $(
 diff --git a/library/core/tests/atomic.rs b/library/core/tests/atomic.rs
-index 2d1e449..cb6da5d 100644
+index b735957..ea728b6 100644
 --- a/library/core/tests/atomic.rs
 +++ b/library/core/tests/atomic.rs
-@@ -145,10 +145,6 @@ fn atomic_alignment() {
+@@ -185,10 +185,6 @@ fn atomic_alignment() {
      assert_eq!(align_of::<AtomicU64>(), size_of::<AtomicU64>());
      #[cfg(target_has_atomic = "64")]
      assert_eq!(align_of::<AtomicI64>(), size_of::<AtomicI64>());
@@ -74,30 +98,6 @@ index 2d1e449..cb6da5d 100644
      #[cfg(target_has_atomic = "ptr")]
      assert_eq!(align_of::<AtomicUsize>(), size_of::<AtomicUsize>());
      #[cfg(target_has_atomic = "ptr")]
-diff --git a/library/std/src/panic.rs b/library/std/src/panic.rs
-index 89a822a..779fd88 100644
---- a/library/std/src/panic.rs
-+++ b/library/std/src/panic.rs
-@@ -279,9 +279,6 @@ impl RefUnwindSafe for atomic::AtomicI32 {}
- #[cfg(target_has_atomic_load_store = "64")]
- #[stable(feature = "integer_atomics_stable", since = "1.34.0")]
- impl RefUnwindSafe for atomic::AtomicI64 {}
--#[cfg(target_has_atomic_load_store = "128")]
--#[unstable(feature = "integer_atomics", issue = "32976")]
--impl RefUnwindSafe for atomic::AtomicI128 {}
- 
- #[cfg(target_has_atomic_load_store = "ptr")]
- #[stable(feature = "unwind_safe_atomic_refs", since = "1.14.0")]
-@@ -298,9 +295,6 @@ impl RefUnwindSafe for atomic::AtomicU32 {}
- #[cfg(target_has_atomic_load_store = "64")]
- #[stable(feature = "integer_atomics_stable", since = "1.34.0")]
- impl RefUnwindSafe for atomic::AtomicU64 {}
--#[cfg(target_has_atomic_load_store = "128")]
--#[unstable(feature = "integer_atomics", issue = "32976")]
--impl RefUnwindSafe for atomic::AtomicU128 {}
- 
- #[cfg(target_has_atomic_load_store = "8")]
- #[stable(feature = "unwind_safe_atomic_refs", since = "1.14.0")]
 -- 
 2.26.2.7.g19db9cfb68
 
diff --git a/compiler/rustc_codegen_cranelift/rust-toolchain b/compiler/rustc_codegen_cranelift/rust-toolchain
index f806f7bdcd9..f074ebe7a42 100644
--- a/compiler/rustc_codegen_cranelift/rust-toolchain
+++ b/compiler/rustc_codegen_cranelift/rust-toolchain
@@ -1,3 +1,3 @@
 [toolchain]
-channel = "nightly-2021-07-07"
+channel = "nightly-2021-08-05"
 components = ["rust-src", "rustc-dev", "llvm-tools-preview"]
diff --git a/compiler/rustc_codegen_cranelift/scripts/cargo.rs b/compiler/rustc_codegen_cranelift/scripts/cargo.rs
index b7e8dd44974..89ec8da77d3 100644
--- a/compiler/rustc_codegen_cranelift/scripts/cargo.rs
+++ b/compiler/rustc_codegen_cranelift/scripts/cargo.rs
@@ -44,7 +44,11 @@ fn main() {
             );
             std::array::IntoIter::new(["rustc".to_string()])
                 .chain(env::args().skip(2))
-                .chain(["--".to_string(), "-Cllvm-args=mode=jit".to_string()])
+                .chain([
+                    "--".to_string(),
+                    "-Zunstable-features".to_string(),
+                    "-Cllvm-args=mode=jit".to_string(),
+                ])
                 .collect()
         }
         Some("lazy-jit") => {
@@ -54,7 +58,11 @@ fn main() {
             );
             std::array::IntoIter::new(["rustc".to_string()])
                 .chain(env::args().skip(2))
-                .chain(["--".to_string(), "-Cllvm-args=mode=jit-lazy".to_string()])
+                .chain([
+                    "--".to_string(),
+                    "-Zunstable-features".to_string(),
+                    "-Cllvm-args=mode=jit-lazy".to_string(),
+                ])
                 .collect()
         }
         _ => env::args().skip(1).collect(),
diff --git a/compiler/rustc_codegen_cranelift/scripts/filter_profile.rs b/compiler/rustc_codegen_cranelift/scripts/filter_profile.rs
index 9e196afbe4f..c4801a0a87b 100755
--- a/compiler/rustc_codegen_cranelift/scripts/filter_profile.rs
+++ b/compiler/rustc_codegen_cranelift/scripts/filter_profile.rs
@@ -5,7 +5,7 @@ pushd $(dirname "$0")/../
 source scripts/config.sh
 RUSTC="$(pwd)/build/bin/cg_clif"
 popd
-PROFILE=$1 OUTPUT=$2 exec $RUSTC -Cllvm-args=mode=jit -Cprefer-dynamic $0
+PROFILE=$1 OUTPUT=$2 exec $RUSTC -Zunstable-options -Cllvm-args=mode=jit -Cprefer-dynamic $0
 #*/
 
 //! This program filters away uninteresting samples and trims uninteresting frames for stackcollapse
diff --git a/compiler/rustc_codegen_cranelift/scripts/setup_rust_fork.sh b/compiler/rustc_codegen_cranelift/scripts/setup_rust_fork.sh
index 52adaaa8de6..ca83e7096b8 100644
--- a/compiler/rustc_codegen_cranelift/scripts/setup_rust_fork.sh
+++ b/compiler/rustc_codegen_cranelift/scripts/setup_rust_fork.sh
@@ -33,7 +33,7 @@ index d95b5b7f17f..00b6f0e3635 100644
  [dependencies]
  core = { path = "../core" }
 -compiler_builtins = { version = "0.1.40", features = ['rustc-dep-of-std'] }
-+compiler_builtins = { version = "0.1.45", features = ['rustc-dep-of-std', 'no-asm'] }
++compiler_builtins = { version = "0.1.46", features = ['rustc-dep-of-std', 'no-asm'] }
 
  [dev-dependencies]
  rand = "0.7"
diff --git a/compiler/rustc_codegen_cranelift/scripts/test_rustc_tests.sh b/compiler/rustc_codegen_cranelift/scripts/test_rustc_tests.sh
index 2f5c2cf737b..0ac49dd3574 100755
--- a/compiler/rustc_codegen_cranelift/scripts/test_rustc_tests.sh
+++ b/compiler/rustc_codegen_cranelift/scripts/test_rustc_tests.sh
@@ -79,7 +79,6 @@ rm src/test/ui/type-alias-impl-trait/cross_crate_ice*.rs # requires removed aux
 
 rm src/test/ui/allocator/no_std-alloc-error-handler-default.rs # missing rust_oom definition
 rm src/test/ui/cfg/cfg-panic.rs
-rm src/test/ui/default-alloc-error-hook.rs
 rm -r src/test/ui/hygiene/
 
 rm -r src/test/ui/polymorphization/ # polymorphization not yet supported
diff --git a/compiler/rustc_codegen_cranelift/scripts/tests.sh b/compiler/rustc_codegen_cranelift/scripts/tests.sh
index 5df04c533a7..0eef710239b 100755
--- a/compiler/rustc_codegen_cranelift/scripts/tests.sh
+++ b/compiler/rustc_codegen_cranelift/scripts/tests.sh
@@ -16,10 +16,10 @@ function no_sysroot_tests() {
 
     if [[ "$JIT_SUPPORTED" = "1" ]]; then
         echo "[JIT] mini_core_hello_world"
-        CG_CLIF_JIT_ARGS="abc bcd" $MY_RUSTC -Cllvm-args=mode=jit -Cprefer-dynamic example/mini_core_hello_world.rs --cfg jit --target "$HOST_TRIPLE"
+        CG_CLIF_JIT_ARGS="abc bcd" $MY_RUSTC -Zunstable-options -Cllvm-args=mode=jit -Cprefer-dynamic example/mini_core_hello_world.rs --cfg jit --target "$HOST_TRIPLE"
 
         echo "[JIT-lazy] mini_core_hello_world"
-        CG_CLIF_JIT_ARGS="abc bcd" $MY_RUSTC -Cllvm-args=mode=jit-lazy -Cprefer-dynamic example/mini_core_hello_world.rs --cfg jit --target "$HOST_TRIPLE"
+        CG_CLIF_JIT_ARGS="abc bcd" $MY_RUSTC -Zunstable-options -Cllvm-args=mode=jit-lazy -Cprefer-dynamic example/mini_core_hello_world.rs --cfg jit --target "$HOST_TRIPLE"
     else
         echo "[JIT] mini_core_hello_world (skipped)"
     fi
@@ -44,10 +44,10 @@ function base_sysroot_tests() {
 
     if [[ "$JIT_SUPPORTED" = "1" ]]; then
         echo "[JIT] std_example"
-        $MY_RUSTC -Cllvm-args=mode=jit -Cprefer-dynamic example/std_example.rs --target "$HOST_TRIPLE"
+        $MY_RUSTC -Zunstable-options -Cllvm-args=mode=jit -Cprefer-dynamic example/std_example.rs --target "$HOST_TRIPLE"
 
         echo "[JIT-lazy] std_example"
-        $MY_RUSTC -Cllvm-args=mode=jit-lazy -Cprefer-dynamic example/std_example.rs --target "$HOST_TRIPLE"
+        $MY_RUSTC -Zunstable-options -Cllvm-args=mode=jit-lazy -Cprefer-dynamic example/std_example.rs --target "$HOST_TRIPLE"
     else
         echo "[JIT] std_example (skipped)"
     fi
@@ -136,6 +136,15 @@ function extended_sysroot_tests() {
         ../build/cargo build --tests --target $TARGET_TRIPLE
     fi
     popd
+
+    pushd stdsimd
+    echo "[TEST] rust-lang/stdsimd"
+    ../build/cargo clean
+    ../build/cargo build --all-targets --target $TARGET_TRIPLE
+    if [[ "$HOST_TRIPLE" = "$TARGET_TRIPLE" ]]; then
+        ../build/cargo test -q
+    fi
+    popd
 }
 
 case "$1" in
diff --git a/compiler/rustc_codegen_cranelift/src/abi/mod.rs b/compiler/rustc_codegen_cranelift/src/abi/mod.rs
index 54c8fb0e7b8..13790409e59 100644
--- a/compiler/rustc_codegen_cranelift/src/abi/mod.rs
+++ b/compiler/rustc_codegen_cranelift/src/abi/mod.rs
@@ -9,13 +9,12 @@ use rustc_middle::ty::layout::FnAbiExt;
 use rustc_target::abi::call::{Conv, FnAbi};
 use rustc_target::spec::abi::Abi;
 
-use cranelift_codegen::ir::AbiParam;
-use smallvec::smallvec;
+use cranelift_codegen::ir::{AbiParam, SigRef};
 
 use self::pass_mode::*;
 use crate::prelude::*;
 
-pub(crate) use self::returning::{can_return_to_ssa_var, codegen_return};
+pub(crate) use self::returning::codegen_return;
 
 fn clif_sig_from_fn_abi<'tcx>(
     tcx: TyCtxt<'tcx>,
@@ -236,27 +235,20 @@ pub(crate) fn codegen_fn_prelude<'tcx>(fx: &mut FunctionCx<'_, '_, 'tcx>, start_
         // not mutated by the current function, this is necessary to support unsized arguments.
         if let ArgKind::Normal(Some(val)) = arg_kind {
             if let Some((addr, meta)) = val.try_to_ptr() {
-                let local_decl = &fx.mir.local_decls[local];
-                //                       v this ! is important
-                let internally_mutable = !val
-                    .layout()
-                    .ty
-                    .is_freeze(fx.tcx.at(local_decl.source_info.span), ParamEnv::reveal_all());
-                if local_decl.mutability == mir::Mutability::Not && !internally_mutable {
-                    // We wont mutate this argument, so it is fine to borrow the backing storage
-                    // of this argument, to prevent a copy.
-
-                    let place = if let Some(meta) = meta {
-                        CPlace::for_ptr_with_extra(addr, meta, val.layout())
-                    } else {
-                        CPlace::for_ptr(addr, val.layout())
-                    };
-
-                    self::comments::add_local_place_comments(fx, place, local);
-
-                    assert_eq!(fx.local_map.push(place), local);
-                    continue;
-                }
+                // Ownership of the value at the backing storage for an argument is passed to the
+                // callee per the ABI, so it is fine to borrow the backing storage of this argument
+                // to prevent a copy.
+
+                let place = if let Some(meta) = meta {
+                    CPlace::for_ptr_with_extra(addr, meta, val.layout())
+                } else {
+                    CPlace::for_ptr(addr, val.layout())
+                };
+
+                self::comments::add_local_place_comments(fx, place, local);
+
+                assert_eq!(fx.local_map.push(place), local);
+                continue;
             }
         }
 
@@ -292,6 +284,22 @@ pub(crate) fn codegen_fn_prelude<'tcx>(fx: &mut FunctionCx<'_, '_, 'tcx>, start_
     fx.bcx.ins().jump(*fx.block_map.get(START_BLOCK).unwrap(), &[]);
 }
 
+struct CallArgument<'tcx> {
+    value: CValue<'tcx>,
+    is_owned: bool,
+}
+
+// FIXME avoid intermediate `CValue` before calling `adjust_arg_for_abi`
+fn codegen_call_argument_operand<'tcx>(
+    fx: &mut FunctionCx<'_, '_, 'tcx>,
+    operand: &Operand<'tcx>,
+) -> CallArgument<'tcx> {
+    CallArgument {
+        value: codegen_operand(fx, operand),
+        is_owned: matches!(operand, Operand::Move(_)),
+    }
+}
+
 pub(crate) fn codegen_terminator_call<'tcx>(
     fx: &mut FunctionCx<'_, '_, 'tcx>,
     span: Span,
@@ -360,12 +368,12 @@ pub(crate) fn codegen_terminator_call<'tcx>(
     }
 
     // Unpack arguments tuple for closures
-    let args = if fn_sig.abi == Abi::RustCall {
+    let mut args = if fn_sig.abi == Abi::RustCall {
         assert_eq!(args.len(), 2, "rust-call abi requires two arguments");
-        let self_arg = codegen_operand(fx, &args[0]);
-        let pack_arg = codegen_operand(fx, &args[1]);
+        let self_arg = codegen_call_argument_operand(fx, &args[0]);
+        let pack_arg = codegen_call_argument_operand(fx, &args[1]);
 
-        let tupled_arguments = match pack_arg.layout().ty.kind() {
+        let tupled_arguments = match pack_arg.value.layout().ty.kind() {
             ty::Tuple(ref tupled_arguments) => tupled_arguments,
             _ => bug!("argument to function with \"rust-call\" ABI is not a tuple"),
         };
@@ -373,37 +381,53 @@ pub(crate) fn codegen_terminator_call<'tcx>(
         let mut args = Vec::with_capacity(1 + tupled_arguments.len());
         args.push(self_arg);
         for i in 0..tupled_arguments.len() {
-            args.push(pack_arg.value_field(fx, mir::Field::new(i)));
+            args.push(CallArgument {
+                value: pack_arg.value.value_field(fx, mir::Field::new(i)),
+                is_owned: pack_arg.is_owned,
+            });
         }
         args
     } else {
-        args.iter().map(|arg| codegen_operand(fx, arg)).collect::<Vec<_>>()
+        args.iter().map(|arg| codegen_call_argument_operand(fx, arg)).collect::<Vec<_>>()
     };
 
-    //   | indirect call target
-    //   |         | the first argument to be passed
-    //   v         v
-    let (func_ref, first_arg) = match instance {
+    // Pass the caller location for `#[track_caller]`.
+    if instance.map(|inst| inst.def.requires_caller_location(fx.tcx)).unwrap_or(false) {
+        let caller_location = fx.get_caller_location(span);
+        args.push(CallArgument { value: caller_location, is_owned: false });
+    }
+
+    let args = args;
+    assert_eq!(fn_abi.args.len(), args.len());
+
+    enum CallTarget {
+        Direct(FuncRef),
+        Indirect(SigRef, Value),
+    }
+
+    let (func_ref, first_arg_override) = match instance {
         // Trait object call
         Some(Instance { def: InstanceDef::Virtual(_, idx), .. }) => {
             if fx.clif_comments.enabled() {
                 let nop_inst = fx.bcx.ins().nop();
                 fx.add_comment(
                     nop_inst,
-                    format!("virtual call; self arg pass mode: {:?}", &fn_abi.args[0],),
+                    format!("virtual call; self arg pass mode: {:?}", &fn_abi.args[0]),
                 );
             }
-            let (ptr, method) = crate::vtable::get_ptr_and_method_ref(fx, args[0], idx);
-            (Some(method), smallvec![ptr])
+
+            let (ptr, method) = crate::vtable::get_ptr_and_method_ref(fx, args[0].value, idx);
+            let sig = clif_sig_from_fn_abi(fx.tcx, fx.triple(), &fn_abi);
+            let sig = fx.bcx.import_signature(sig);
+
+            (CallTarget::Indirect(sig, method), Some(ptr))
         }
 
         // Normal call
-        Some(_) => (
-            None,
-            args.get(0)
-                .map(|arg| adjust_arg_for_abi(fx, *arg, &fn_abi.args[0]))
-                .unwrap_or(smallvec![]),
-        ),
+        Some(instance) => {
+            let func_ref = fx.get_function_ref(instance);
+            (CallTarget::Direct(func_ref), None)
+        }
 
         // Indirect call
         None => {
@@ -411,80 +435,64 @@ pub(crate) fn codegen_terminator_call<'tcx>(
                 let nop_inst = fx.bcx.ins().nop();
                 fx.add_comment(nop_inst, "indirect call");
             }
+
             let func = codegen_operand(fx, func).load_scalar(fx);
-            (
-                Some(func),
-                args.get(0)
-                    .map(|arg| adjust_arg_for_abi(fx, *arg, &fn_abi.args[0]))
-                    .unwrap_or(smallvec![]),
-            )
+            let sig = clif_sig_from_fn_abi(fx.tcx, fx.triple(), &fn_abi);
+            let sig = fx.bcx.import_signature(sig);
+
+            (CallTarget::Indirect(sig, func), None)
         }
     };
 
     let ret_place = destination.map(|(place, _)| place);
-    let (call_inst, call_args) = self::returning::codegen_with_call_return_arg(
-        fx,
-        &fn_abi.ret,
-        ret_place,
-        |fx, return_ptr| {
-            let regular_args_count = args.len();
-            let mut call_args: Vec<Value> = return_ptr
-                .into_iter()
-                .chain(first_arg.into_iter())
-                .chain(
-                    args.into_iter()
-                        .enumerate()
-                        .skip(1)
-                        .map(|(i, arg)| adjust_arg_for_abi(fx, arg, &fn_abi.args[i]).into_iter())
-                        .flatten(),
-                )
-                .collect::<Vec<_>>();
-
-            if instance.map(|inst| inst.def.requires_caller_location(fx.tcx)).unwrap_or(false) {
-                // Pass the caller location for `#[track_caller]`.
-                let caller_location = fx.get_caller_location(span);
-                call_args.extend(
-                    adjust_arg_for_abi(fx, caller_location, &fn_abi.args[regular_args_count])
-                        .into_iter(),
-                );
-                assert_eq!(fn_abi.args.len(), regular_args_count + 1);
-            } else {
-                assert_eq!(fn_abi.args.len(), regular_args_count);
+    self::returning::codegen_with_call_return_arg(fx, &fn_abi.ret, ret_place, |fx, return_ptr| {
+        let call_args = return_ptr
+            .into_iter()
+            .chain(first_arg_override.into_iter())
+            .chain(
+                args.into_iter()
+                    .enumerate()
+                    .skip(if first_arg_override.is_some() { 1 } else { 0 })
+                    .map(|(i, arg)| {
+                        adjust_arg_for_abi(fx, arg.value, &fn_abi.args[i], arg.is_owned).into_iter()
+                    })
+                    .flatten(),
+            )
+            .collect::<Vec<Value>>();
+
+        let call_inst = match func_ref {
+            CallTarget::Direct(func_ref) => fx.bcx.ins().call(func_ref, &call_args),
+            CallTarget::Indirect(sig, func_ptr) => {
+                fx.bcx.ins().call_indirect(sig, func_ptr, &call_args)
             }
+        };
 
-            let call_inst = if let Some(func_ref) = func_ref {
-                let sig = clif_sig_from_fn_abi(fx.tcx, fx.triple(), &fn_abi);
-                let sig = fx.bcx.import_signature(sig);
-                fx.bcx.ins().call_indirect(sig, func_ref, &call_args)
-            } else {
-                let func_ref =
-                    fx.get_function_ref(instance.expect("non-indirect call on non-FnDef type"));
-                fx.bcx.ins().call(func_ref, &call_args)
-            };
-
-            (call_inst, call_args)
-        },
-    );
-
-    // FIXME find a cleaner way to support varargs
-    if fn_sig.c_variadic {
-        if !matches!(fn_sig.abi, Abi::C { .. }) {
-            fx.tcx.sess.span_fatal(span, &format!("Variadic call for non-C abi {:?}", fn_sig.abi));
+        // FIXME find a cleaner way to support varargs
+        if fn_sig.c_variadic {
+            if !matches!(fn_sig.abi, Abi::C { .. }) {
+                fx.tcx
+                    .sess
+                    .span_fatal(span, &format!("Variadic call for non-C abi {:?}", fn_sig.abi));
+            }
+            let sig_ref = fx.bcx.func.dfg.call_signature(call_inst).unwrap();
+            let abi_params = call_args
+                .into_iter()
+                .map(|arg| {
+                    let ty = fx.bcx.func.dfg.value_type(arg);
+                    if !ty.is_int() {
+                        // FIXME set %al to upperbound on float args once floats are supported
+                        fx.tcx
+                            .sess
+                            .span_fatal(span, &format!("Non int ty {:?} for variadic call", ty));
+                    }
+                    AbiParam::new(ty)
+                })
+                .collect::<Vec<AbiParam>>();
+            fx.bcx.func.dfg.signatures[sig_ref].params = abi_params;
         }
-        let sig_ref = fx.bcx.func.dfg.call_signature(call_inst).unwrap();
-        let abi_params = call_args
-            .into_iter()
-            .map(|arg| {
-                let ty = fx.bcx.func.dfg.value_type(arg);
-                if !ty.is_int() {
-                    // FIXME set %al to upperbound on float args once floats are supported
-                    fx.tcx.sess.span_fatal(span, &format!("Non int ty {:?} for variadic call", ty));
-                }
-                AbiParam::new(ty)
-            })
-            .collect::<Vec<AbiParam>>();
-        fx.bcx.func.dfg.signatures[sig_ref].params = abi_params;
-    }
+
+        call_inst
+    });
 
     if let Some((_, dest)) = destination {
         let ret_block = fx.get_block(dest);
@@ -535,7 +543,7 @@ pub(crate) fn codegen_drop<'tcx>(
                         TypeAndMut { ty, mutbl: crate::rustc_hir::Mutability::Mut },
                     )),
                 );
-                let arg_value = adjust_arg_for_abi(fx, arg_value, &fn_abi.args[0]);
+                let arg_value = adjust_arg_for_abi(fx, arg_value, &fn_abi.args[0], true);
 
                 let mut call_args: Vec<Value> = arg_value.into_iter().collect::<Vec<_>>();
 
@@ -543,7 +551,7 @@ pub(crate) fn codegen_drop<'tcx>(
                     // Pass the caller location for `#[track_caller]`.
                     let caller_location = fx.get_caller_location(span);
                     call_args.extend(
-                        adjust_arg_for_abi(fx, caller_location, &fn_abi.args[1]).into_iter(),
+                        adjust_arg_for_abi(fx, caller_location, &fn_abi.args[1], false).into_iter(),
                     );
                 }
 
diff --git a/compiler/rustc_codegen_cranelift/src/abi/pass_mode.rs b/compiler/rustc_codegen_cranelift/src/abi/pass_mode.rs
index 7c275965199..44eae706ea8 100644
--- a/compiler/rustc_codegen_cranelift/src/abi/pass_mode.rs
+++ b/compiler/rustc_codegen_cranelift/src/abi/pass_mode.rs
@@ -227,6 +227,7 @@ pub(super) fn adjust_arg_for_abi<'tcx>(
     fx: &mut FunctionCx<'_, '_, 'tcx>,
     arg: CValue<'tcx>,
     arg_abi: &ArgAbi<'tcx, Ty<'tcx>>,
+    is_owned: bool,
 ) -> SmallVec<[Value; 2]> {
     assert_assignable(fx, arg.layout().ty, arg_abi.layout.ty);
     match arg_abi.mode {
@@ -237,10 +238,21 @@ pub(super) fn adjust_arg_for_abi<'tcx>(
             smallvec![a, b]
         }
         PassMode::Cast(cast) => to_casted_value(fx, arg, cast),
-        PassMode::Indirect { .. } => match arg.force_stack(fx) {
-            (ptr, None) => smallvec![ptr.get_addr(fx)],
-            (ptr, Some(meta)) => smallvec![ptr.get_addr(fx), meta],
-        },
+        PassMode::Indirect { .. } => {
+            if is_owned {
+                match arg.force_stack(fx) {
+                    (ptr, None) => smallvec![ptr.get_addr(fx)],
+                    (ptr, Some(meta)) => smallvec![ptr.get_addr(fx), meta],
+                }
+            } else {
+                // Ownership of the value at the backing storage for an argument is passed to the
+                // callee per the ABI, so we must make a copy of the argument unless the argument
+                // local is moved.
+                let place = CPlace::new_stack_slot(fx, arg.layout());
+                place.write_cvalue(fx, arg);
+                smallvec![place.to_ptr().get_addr(fx)]
+            }
+        }
     }
 }
 
diff --git a/compiler/rustc_codegen_cranelift/src/abi/returning.rs b/compiler/rustc_codegen_cranelift/src/abi/returning.rs
index e1c53224b4f..c1bdba43e6c 100644
--- a/compiler/rustc_codegen_cranelift/src/abi/returning.rs
+++ b/compiler/rustc_codegen_cranelift/src/abi/returning.rs
@@ -2,54 +2,9 @@
 
 use crate::prelude::*;
 
-use rustc_middle::ty::layout::FnAbiExt;
-use rustc_target::abi::call::{ArgAbi, FnAbi, PassMode};
+use rustc_target::abi::call::{ArgAbi, PassMode};
 use smallvec::{smallvec, SmallVec};
 
-/// Can the given type be returned into an ssa var or does it need to be returned on the stack.
-pub(crate) fn can_return_to_ssa_var<'tcx>(
-    fx: &FunctionCx<'_, '_, 'tcx>,
-    func: &mir::Operand<'tcx>,
-    args: &[mir::Operand<'tcx>],
-) -> bool {
-    let fn_ty = fx.monomorphize(func.ty(fx.mir, fx.tcx));
-    let fn_sig =
-        fx.tcx.normalize_erasing_late_bound_regions(ParamEnv::reveal_all(), fn_ty.fn_sig(fx.tcx));
-
-    // Handle special calls like instrinsics and empty drop glue.
-    let instance = if let ty::FnDef(def_id, substs) = *fn_ty.kind() {
-        let instance = ty::Instance::resolve(fx.tcx, ty::ParamEnv::reveal_all(), def_id, substs)
-            .unwrap()
-            .unwrap()
-            .polymorphize(fx.tcx);
-
-        match instance.def {
-            InstanceDef::Intrinsic(_) | InstanceDef::DropGlue(_, _) => {
-                return true;
-            }
-            _ => Some(instance),
-        }
-    } else {
-        None
-    };
-
-    let extra_args = &args[fn_sig.inputs().len()..];
-    let extra_args = extra_args
-        .iter()
-        .map(|op_arg| fx.monomorphize(op_arg.ty(fx.mir, fx.tcx)))
-        .collect::<Vec<_>>();
-    let fn_abi = if let Some(instance) = instance {
-        FnAbi::of_instance(&RevealAllLayoutCx(fx.tcx), instance, &extra_args)
-    } else {
-        FnAbi::of_fn_ptr(&RevealAllLayoutCx(fx.tcx), fn_ty.fn_sig(fx.tcx), &extra_args)
-    };
-    match fn_abi.ret.mode {
-        PassMode::Ignore | PassMode::Direct(_) | PassMode::Pair(_, _) => true,
-        // FIXME Make it possible to return Cast and Indirect to an ssa var.
-        PassMode::Cast(_) | PassMode::Indirect { .. } => false,
-    }
-}
-
 /// Return a place where the return value of the current function can be written to. If necessary
 /// this adds an extra parameter pointing to where the return value needs to be stored.
 pub(super) fn codegen_return_param<'tcx>(
@@ -58,8 +13,7 @@ pub(super) fn codegen_return_param<'tcx>(
     block_params_iter: &mut impl Iterator<Item = Value>,
 ) -> CPlace<'tcx> {
     let (ret_place, ret_param): (_, SmallVec<[_; 2]>) = match fx.fn_abi.as_ref().unwrap().ret.mode {
-        PassMode::Ignore => (CPlace::no_place(fx.fn_abi.as_ref().unwrap().ret.layout), smallvec![]),
-        PassMode::Direct(_) | PassMode::Pair(_, _) | PassMode::Cast(_) => {
+        PassMode::Ignore | PassMode::Direct(_) | PassMode::Pair(_, _) | PassMode::Cast(_) => {
             let is_ssa = ssa_analyzed[RETURN_PLACE] == crate::analyze::SsaKind::Ssa;
             (
                 super::make_local_place(
@@ -73,7 +27,7 @@ pub(super) fn codegen_return_param<'tcx>(
         }
         PassMode::Indirect { attrs: _, extra_attrs: None, on_stack: _ } => {
             let ret_param = block_params_iter.next().unwrap();
-            assert_eq!(fx.bcx.func.dfg.value_type(ret_param), pointer_ty(fx.tcx));
+            assert_eq!(fx.bcx.func.dfg.value_type(ret_param), fx.pointer_type);
             (
                 CPlace::for_ptr(Pointer::new(ret_param), fx.fn_abi.as_ref().unwrap().ret.layout),
                 smallvec![ret_param],
@@ -99,25 +53,33 @@ pub(super) fn codegen_return_param<'tcx>(
 
 /// Invokes the closure with if necessary a value representing the return pointer. When the closure
 /// returns the call return value(s) if any are written to the correct place.
-pub(super) fn codegen_with_call_return_arg<'tcx, T>(
+pub(super) fn codegen_with_call_return_arg<'tcx>(
     fx: &mut FunctionCx<'_, '_, 'tcx>,
     ret_arg_abi: &ArgAbi<'tcx, Ty<'tcx>>,
     ret_place: Option<CPlace<'tcx>>,
-    f: impl FnOnce(&mut FunctionCx<'_, '_, 'tcx>, Option<Value>) -> (Inst, T),
-) -> (Inst, T) {
-    let return_ptr = match ret_arg_abi.mode {
-        PassMode::Ignore => None,
+    f: impl FnOnce(&mut FunctionCx<'_, '_, 'tcx>, Option<Value>) -> Inst,
+) {
+    let (ret_temp_place, return_ptr) = match ret_arg_abi.mode {
+        PassMode::Ignore => (None, None),
         PassMode::Indirect { attrs: _, extra_attrs: None, on_stack: _ } => match ret_place {
-            Some(ret_place) => Some(ret_place.to_ptr().get_addr(fx)),
-            None => Some(fx.bcx.ins().iconst(fx.pointer_type, 43)), // FIXME allocate temp stack slot
+            Some(ret_place) if matches!(ret_place.inner(), CPlaceInner::Addr(_, None)) => {
+                // This is an optimization to prevent unnecessary copies of the return value when
+                // the return place is already a memory place as opposed to a register.
+                // This match arm can be safely removed.
+                (None, Some(ret_place.to_ptr().get_addr(fx)))
+            }
+            _ => {
+                let place = CPlace::new_stack_slot(fx, ret_arg_abi.layout);
+                (Some(place), Some(place.to_ptr().get_addr(fx)))
+            }
         },
         PassMode::Indirect { attrs: _, extra_attrs: Some(_), on_stack: _ } => {
             unreachable!("unsized return value")
         }
-        PassMode::Direct(_) | PassMode::Pair(_, _) | PassMode::Cast(_) => None,
+        PassMode::Direct(_) | PassMode::Pair(_, _) | PassMode::Cast(_) => (None, None),
     };
 
-    let (call_inst, meta) = f(fx, return_ptr);
+    let call_inst = f(fx, return_ptr);
 
     match ret_arg_abi.mode {
         PassMode::Ignore => {}
@@ -150,13 +112,19 @@ pub(super) fn codegen_with_call_return_arg<'tcx, T>(
                 ret_place.write_cvalue(fx, result);
             }
         }
-        PassMode::Indirect { attrs: _, extra_attrs: None, on_stack: _ } => {}
+        PassMode::Indirect { attrs: _, extra_attrs: None, on_stack: _ } => {
+            if let (Some(ret_place), Some(ret_temp_place)) = (ret_place, ret_temp_place) {
+                // Both ret_place and ret_temp_place must be Some. If ret_place is None, this is
+                // a non-returning call. If ret_temp_place is None, it is not necessary to copy the
+                // return value.
+                let ret_temp_value = ret_temp_place.to_cvalue(fx);
+                ret_place.write_cvalue(fx, ret_temp_value);
+            }
+        }
         PassMode::Indirect { attrs: _, extra_attrs: Some(_), on_stack: _ } => {
             unreachable!("unsized return value")
         }
     }
-
-    (call_inst, meta)
 }
 
 /// Codegen a return instruction with the right return value(s) if any.
diff --git a/compiler/rustc_codegen_cranelift/src/allocator.rs b/compiler/rustc_codegen_cranelift/src/allocator.rs
index d39486c2f10..637d30f9344 100644
--- a/compiler/rustc_codegen_cranelift/src/allocator.rs
+++ b/compiler/rustc_codegen_cranelift/src/allocator.rs
@@ -5,7 +5,6 @@ use crate::prelude::*;
 
 use cranelift_codegen::binemit::{NullStackMapSink, NullTrapSink};
 use rustc_ast::expand::allocator::{AllocatorKind, AllocatorTy, ALLOCATOR_METHODS};
-use rustc_span::symbol::sym;
 
 /// Returns whether an allocator shim was created
 pub(crate) fn codegen(
@@ -20,7 +19,7 @@ pub(crate) fn codegen(
     if any_dynamic_crate {
         false
     } else if let Some(kind) = tcx.allocator_kind(()) {
-        codegen_inner(module, unwind_context, kind);
+        codegen_inner(module, unwind_context, kind, tcx.lang_items().oom().is_some());
         true
     } else {
         false
@@ -31,6 +30,7 @@ fn codegen_inner(
     module: &mut impl Module,
     unwind_context: &mut UnwindContext,
     kind: AllocatorKind,
+    has_alloc_error_handler: bool,
 ) {
     let usize_ty = module.target_config().pointer_type();
 
@@ -65,7 +65,6 @@ fn codegen_inner(
 
         let caller_name = format!("__rust_{}", method.name);
         let callee_name = kind.fn_name(method.name);
-        //eprintln!("Codegen allocator shim {} -> {} ({:?} -> {:?})", caller_name, callee_name, sig.params, sig.returns);
 
         let func_id = module.declare_function(&caller_name, Linkage::Export, &sig).unwrap();
 
@@ -104,13 +103,12 @@ fn codegen_inner(
         returns: vec![],
     };
 
-    let callee_name = kind.fn_name(sym::oom);
-    //eprintln!("Codegen allocator shim {} -> {} ({:?} -> {:?})", caller_name, callee_name, sig.params, sig.returns);
+    let callee_name = if has_alloc_error_handler { "__rg_oom" } else { "__rdl_oom" };
 
     let func_id =
         module.declare_function("__rust_alloc_error_handler", Linkage::Export, &sig).unwrap();
 
-    let callee_func_id = module.declare_function(&callee_name, Linkage::Import, &sig).unwrap();
+    let callee_func_id = module.declare_function(callee_name, Linkage::Import, &sig).unwrap();
 
     let mut ctx = Context::new();
     ctx.func = Function::with_name_signature(ExternalName::user(0, 0), sig);
diff --git a/compiler/rustc_codegen_cranelift/src/analyze.rs b/compiler/rustc_codegen_cranelift/src/analyze.rs
index efead25552f..35b89358b19 100644
--- a/compiler/rustc_codegen_cranelift/src/analyze.rs
+++ b/compiler/rustc_codegen_cranelift/src/analyze.rs
@@ -38,17 +38,6 @@ pub(crate) fn analyze(fx: &FunctionCx<'_, '_, '_>) -> IndexVec<Local, SsaKind> {
                 _ => {}
             }
         }
-
-        match &bb.terminator().kind {
-            TerminatorKind::Call { destination, func, args, .. } => {
-                if let Some((dest_place, _dest_bb)) = destination {
-                    if !crate::abi::can_return_to_ssa_var(fx, func, args) {
-                        not_ssa(&mut flag_map, dest_place.local)
-                    }
-                }
-            }
-            _ => {}
-        }
     }
 
     flag_map
diff --git a/compiler/rustc_codegen_cranelift/src/base.rs b/compiler/rustc_codegen_cranelift/src/base.rs
index 3d78eed77b9..e99a227a3a6 100644
--- a/compiler/rustc_codegen_cranelift/src/base.rs
+++ b/compiler/rustc_codegen_cranelift/src/base.rs
@@ -334,8 +334,6 @@ fn codegen_fn_content(fx: &mut FunctionCx<'_, '_, '_>) {
                         crate::optimize::peephole::maybe_unwrap_bool_not(&mut fx.bcx, discr);
                     let test_zero = if is_inverted { !test_zero } else { test_zero };
                     let discr = crate::optimize::peephole::maybe_unwrap_bint(&mut fx.bcx, discr);
-                    let discr =
-                        crate::optimize::peephole::make_branchable_value(&mut fx.bcx, discr);
                     if let Some(taken) = crate::optimize::peephole::maybe_known_branch_taken(
                         &fx.bcx, discr, test_zero,
                     ) {
diff --git a/compiler/rustc_codegen_cranelift/src/cast.rs b/compiler/rustc_codegen_cranelift/src/cast.rs
index 74c5e09f08d..e7e6afeb865 100644
--- a/compiler/rustc_codegen_cranelift/src/cast.rs
+++ b/compiler/rustc_codegen_cranelift/src/cast.rs
@@ -14,21 +14,6 @@ pub(crate) fn clif_intcast(
         (_, _) if from == to => val,
 
         // extend
-        (_, types::I128) => {
-            let lo = if from == types::I64 {
-                val
-            } else if signed {
-                fx.bcx.ins().sextend(types::I64, val)
-            } else {
-                fx.bcx.ins().uextend(types::I64, val)
-            };
-            let hi = if signed {
-                fx.bcx.ins().sshr_imm(lo, 63)
-            } else {
-                fx.bcx.ins().iconst(types::I64, 0)
-            };
-            fx.bcx.ins().iconcat(lo, hi)
-        }
         (_, _) if to.wider_or_equal(from) => {
             if signed {
                 fx.bcx.ins().sextend(to, val)
@@ -38,10 +23,6 @@ pub(crate) fn clif_intcast(
         }
 
         // reduce
-        (types::I128, _) => {
-            let (lsb, _msb) = fx.bcx.ins().isplit(val);
-            if to == types::I64 { lsb } else { fx.bcx.ins().ireduce(to, lsb) }
-        }
         (_, _) => fx.bcx.ins().ireduce(to, val),
     }
 }
diff --git a/compiler/rustc_codegen_cranelift/src/codegen_i128.rs b/compiler/rustc_codegen_cranelift/src/codegen_i128.rs
index ffe1922ab90..638b2d573b5 100644
--- a/compiler/rustc_codegen_cranelift/src/codegen_i128.rs
+++ b/compiler/rustc_codegen_cranelift/src/codegen_i128.rs
@@ -19,9 +19,6 @@ pub(crate) fn maybe_codegen<'tcx>(
         return None;
     }
 
-    let lhs_val = lhs.load_scalar(fx);
-    let rhs_val = rhs.load_scalar(fx);
-
     let is_signed = type_sign(lhs.layout().ty);
 
     match bin_op {
@@ -30,29 +27,53 @@ pub(crate) fn maybe_codegen<'tcx>(
             None
         }
         BinOp::Add | BinOp::Sub if !checked => None,
-        BinOp::Mul if !checked => {
-            let val_ty = if is_signed { fx.tcx.types.i128 } else { fx.tcx.types.u128 };
-            if fx.tcx.sess.target.is_like_windows {
-                let ret_place = CPlace::new_stack_slot(fx, lhs.layout());
-                let (lhs_ptr, lhs_extra) = lhs.force_stack(fx);
-                let (rhs_ptr, rhs_extra) = rhs.force_stack(fx);
-                assert!(lhs_extra.is_none());
-                assert!(rhs_extra.is_none());
-                let args =
-                    [ret_place.to_ptr().get_addr(fx), lhs_ptr.get_addr(fx), rhs_ptr.get_addr(fx)];
-                fx.lib_call(
-                    "__multi3",
+        BinOp::Mul if !checked || is_signed => {
+            if !checked {
+                let val_ty = if is_signed { fx.tcx.types.i128 } else { fx.tcx.types.u128 };
+                if fx.tcx.sess.target.is_like_windows {
+                    let ret_place = CPlace::new_stack_slot(fx, lhs.layout());
+                    let (lhs_ptr, lhs_extra) = lhs.force_stack(fx);
+                    let (rhs_ptr, rhs_extra) = rhs.force_stack(fx);
+                    assert!(lhs_extra.is_none());
+                    assert!(rhs_extra.is_none());
+                    let args = [
+                        ret_place.to_ptr().get_addr(fx),
+                        lhs_ptr.get_addr(fx),
+                        rhs_ptr.get_addr(fx),
+                    ];
+                    fx.lib_call(
+                        "__multi3",
+                        vec![
+                            AbiParam::special(fx.pointer_type, ArgumentPurpose::StructReturn),
+                            AbiParam::new(fx.pointer_type),
+                            AbiParam::new(fx.pointer_type),
+                        ],
+                        vec![],
+                        &args,
+                    );
+                    Some(ret_place.to_cvalue(fx))
+                } else {
+                    Some(fx.easy_call("__multi3", &[lhs, rhs], val_ty))
+                }
+            } else {
+                let out_ty = fx.tcx.mk_tup([lhs.layout().ty, fx.tcx.types.bool].iter());
+                let oflow = CPlace::new_stack_slot(fx, fx.layout_of(fx.tcx.types.i32));
+                let lhs = lhs.load_scalar(fx);
+                let rhs = rhs.load_scalar(fx);
+                let oflow_ptr = oflow.to_ptr().get_addr(fx);
+                let res = fx.lib_call(
+                    "__muloti4",
                     vec![
-                        AbiParam::special(pointer_ty(fx.tcx), ArgumentPurpose::StructReturn),
-                        AbiParam::new(pointer_ty(fx.tcx)),
-                        AbiParam::new(pointer_ty(fx.tcx)),
+                        AbiParam::new(types::I128),
+                        AbiParam::new(types::I128),
+                        AbiParam::new(fx.pointer_type),
                     ],
-                    vec![],
-                    &args,
-                );
-                Some(ret_place.to_cvalue(fx))
-            } else {
-                Some(fx.easy_call("__multi3", &[lhs, rhs], val_ty))
+                    vec![AbiParam::new(types::I128)],
+                    &[lhs, rhs, oflow_ptr],
+                )[0];
+                let oflow = oflow.to_cvalue(fx).load_scalar(fx);
+                let oflow = fx.bcx.ins().ireduce(types::I8, oflow);
+                Some(CValue::by_val_pair(res, oflow, fx.layout_of(out_ty)))
             }
         }
         BinOp::Add | BinOp::Sub | BinOp::Mul => {
@@ -66,16 +87,16 @@ pub(crate) fn maybe_codegen<'tcx>(
                 assert!(rhs_extra.is_none());
                 (
                     vec![
-                        AbiParam::special(pointer_ty(fx.tcx), ArgumentPurpose::StructReturn),
-                        AbiParam::new(pointer_ty(fx.tcx)),
-                        AbiParam::new(pointer_ty(fx.tcx)),
+                        AbiParam::special(fx.pointer_type, ArgumentPurpose::StructReturn),
+                        AbiParam::new(fx.pointer_type),
+                        AbiParam::new(fx.pointer_type),
                     ],
                     [out_place.to_ptr().get_addr(fx), lhs_ptr.get_addr(fx), rhs_ptr.get_addr(fx)],
                 )
             } else {
                 (
                     vec![
-                        AbiParam::special(pointer_ty(fx.tcx), ArgumentPurpose::StructReturn),
+                        AbiParam::special(fx.pointer_type, ArgumentPurpose::StructReturn),
                         AbiParam::new(types::I128),
                         AbiParam::new(types::I128),
                     ],
@@ -88,7 +109,6 @@ pub(crate) fn maybe_codegen<'tcx>(
                 (BinOp::Sub, false) => "__rust_u128_subo",
                 (BinOp::Sub, true) => "__rust_i128_subo",
                 (BinOp::Mul, false) => "__rust_u128_mulo",
-                (BinOp::Mul, true) => "__rust_i128_mulo",
                 _ => unreachable!(),
             };
             fx.lib_call(name, param_types, vec![], &args);
@@ -112,7 +132,7 @@ pub(crate) fn maybe_codegen<'tcx>(
                 let args = [lhs_ptr.get_addr(fx), rhs_ptr.get_addr(fx)];
                 let ret = fx.lib_call(
                     name,
-                    vec![AbiParam::new(pointer_ty(fx.tcx)), AbiParam::new(pointer_ty(fx.tcx))],
+                    vec![AbiParam::new(fx.pointer_type), AbiParam::new(fx.pointer_type)],
                     vec![AbiParam::new(types::I64X2)],
                     &args,
                 )[0];
@@ -128,40 +148,6 @@ pub(crate) fn maybe_codegen<'tcx>(
             assert!(!checked);
             None
         }
-        BinOp::Shl | BinOp::Shr => {
-            let is_overflow = if checked {
-                // rhs >= 128
-
-                // FIXME support non 128bit rhs
-                /*let (rhs_lsb, rhs_msb) = fx.bcx.ins().isplit(rhs_val);
-                let rhs_msb_gt_0 = fx.bcx.ins().icmp_imm(IntCC::NotEqual, rhs_msb, 0);
-                let rhs_lsb_ge_128 = fx.bcx.ins().icmp_imm(IntCC::SignedGreaterThan, rhs_lsb, 127);
-                let is_overflow = fx.bcx.ins().bor(rhs_msb_gt_0, rhs_lsb_ge_128);*/
-                let is_overflow = fx.bcx.ins().bconst(types::B1, false);
-
-                Some(fx.bcx.ins().bint(types::I8, is_overflow))
-            } else {
-                None
-            };
-
-            let truncated_rhs = clif_intcast(fx, rhs_val, types::I32, false);
-            let val = match bin_op {
-                BinOp::Shl => fx.bcx.ins().ishl(lhs_val, truncated_rhs),
-                BinOp::Shr => {
-                    if is_signed {
-                        fx.bcx.ins().sshr(lhs_val, truncated_rhs)
-                    } else {
-                        fx.bcx.ins().ushr(lhs_val, truncated_rhs)
-                    }
-                }
-                _ => unreachable!(),
-            };
-            if let Some(is_overflow) = is_overflow {
-                let out_ty = fx.tcx.mk_tup([lhs.layout().ty, fx.tcx.types.bool].iter());
-                Some(CValue::by_val_pair(val, is_overflow, fx.layout_of(out_ty)))
-            } else {
-                Some(CValue::by_val(val, lhs.layout()))
-            }
-        }
+        BinOp::Shl | BinOp::Shr => None,
     }
 }
diff --git a/compiler/rustc_codegen_cranelift/src/debuginfo/emit.rs b/compiler/rustc_codegen_cranelift/src/debuginfo/emit.rs
index 6018eefcd42..fb6ccd7c535 100644
--- a/compiler/rustc_codegen_cranelift/src/debuginfo/emit.rs
+++ b/compiler/rustc_codegen_cranelift/src/debuginfo/emit.rs
@@ -160,7 +160,7 @@ impl Writer for WriterRelocate {
                 let val = match eh_pe.application() {
                     gimli::DW_EH_PE_absptr => val,
                     gimli::DW_EH_PE_pcrel => {
-                        // TODO: better handling of sign
+                        // FIXME better handling of sign
                         let offset = self.len() as u64;
                         offset.wrapping_sub(val)
                     }
diff --git a/compiler/rustc_codegen_cranelift/src/debuginfo/mod.rs b/compiler/rustc_codegen_cranelift/src/debuginfo/mod.rs
index c67336eb3f2..ceef65d5478 100644
--- a/compiler/rustc_codegen_cranelift/src/debuginfo/mod.rs
+++ b/compiler/rustc_codegen_cranelift/src/debuginfo/mod.rs
@@ -46,7 +46,7 @@ impl<'tcx> DebugContext<'tcx> {
     pub(crate) fn new(tcx: TyCtxt<'tcx>, isa: &dyn TargetIsa) -> Self {
         let encoding = Encoding {
             format: Format::Dwarf32,
-            // TODO: this should be configurable
+            // FIXME this should be configurable
             // macOS doesn't seem to support DWARF > 3
             // 5 version is required for md5 file hash
             version: if tcx.sess.target.is_like_osx {
diff --git a/compiler/rustc_codegen_cranelift/src/intrinsics/mod.rs b/compiler/rustc_codegen_cranelift/src/intrinsics/mod.rs
index 3979886e10c..86698460747 100644
--- a/compiler/rustc_codegen_cranelift/src/intrinsics/mod.rs
+++ b/compiler/rustc_codegen_cranelift/src/intrinsics/mod.rs
@@ -175,12 +175,11 @@ fn simd_for_each_lane<'tcx>(
     assert_eq!(lane_count, ret_lane_count);
 
     for lane_idx in 0..lane_count {
-        let lane_idx = mir::Field::new(lane_idx.try_into().unwrap());
-        let lane = val.value_field(fx, lane_idx).load_scalar(fx);
+        let lane = val.value_lane(fx, lane_idx).load_scalar(fx);
 
         let res_lane = f(fx, lane_layout, ret_lane_layout, lane);
 
-        ret.place_field(fx, lane_idx).write_cvalue(fx, res_lane);
+        ret.place_lane(fx, lane_idx).write_cvalue(fx, res_lane);
     }
 }
 
@@ -206,20 +205,20 @@ fn simd_pair_for_each_lane<'tcx>(
     let ret_lane_layout = fx.layout_of(ret_lane_ty);
     assert_eq!(lane_count, ret_lane_count);
 
-    for lane in 0..lane_count {
-        let lane = mir::Field::new(lane.try_into().unwrap());
-        let x_lane = x.value_field(fx, lane).load_scalar(fx);
-        let y_lane = y.value_field(fx, lane).load_scalar(fx);
+    for lane_idx in 0..lane_count {
+        let x_lane = x.value_lane(fx, lane_idx).load_scalar(fx);
+        let y_lane = y.value_lane(fx, lane_idx).load_scalar(fx);
 
         let res_lane = f(fx, lane_layout, ret_lane_layout, x_lane, y_lane);
 
-        ret.place_field(fx, lane).write_cvalue(fx, res_lane);
+        ret.place_lane(fx, lane_idx).write_cvalue(fx, res_lane);
     }
 }
 
 fn simd_reduce<'tcx>(
     fx: &mut FunctionCx<'_, '_, 'tcx>,
     val: CValue<'tcx>,
+    acc: Option<Value>,
     ret: CPlace<'tcx>,
     f: impl Fn(&mut FunctionCx<'_, '_, 'tcx>, TyAndLayout<'tcx>, Value, Value) -> Value,
 ) {
@@ -227,16 +226,17 @@ fn simd_reduce<'tcx>(
     let lane_layout = fx.layout_of(lane_ty);
     assert_eq!(lane_layout, ret.layout());
 
-    let mut res_val = val.value_field(fx, mir::Field::new(0)).load_scalar(fx);
-    for lane_idx in 1..lane_count {
-        let lane =
-            val.value_field(fx, mir::Field::new(lane_idx.try_into().unwrap())).load_scalar(fx);
+    let (mut res_val, start_lane) =
+        if let Some(acc) = acc { (acc, 0) } else { (val.value_lane(fx, 0).load_scalar(fx), 1) };
+    for lane_idx in start_lane..lane_count {
+        let lane = val.value_lane(fx, lane_idx).load_scalar(fx);
         res_val = f(fx, lane_layout, res_val, lane);
     }
     let res = CValue::by_val(res_val, lane_layout);
     ret.write_cvalue(fx, res);
 }
 
+// FIXME move all uses to `simd_reduce`
 fn simd_reduce_bool<'tcx>(
     fx: &mut FunctionCx<'_, '_, 'tcx>,
     val: CValue<'tcx>,
@@ -246,14 +246,18 @@ fn simd_reduce_bool<'tcx>(
     let (lane_count, _lane_ty) = val.layout().ty.simd_size_and_type(fx.tcx);
     assert!(ret.layout().ty.is_bool());
 
-    let res_val = val.value_field(fx, mir::Field::new(0)).load_scalar(fx);
+    let res_val = val.value_lane(fx, 0).load_scalar(fx);
     let mut res_val = fx.bcx.ins().band_imm(res_val, 1); // mask to boolean
     for lane_idx in 1..lane_count {
-        let lane =
-            val.value_field(fx, mir::Field::new(lane_idx.try_into().unwrap())).load_scalar(fx);
+        let lane = val.value_lane(fx, lane_idx).load_scalar(fx);
         let lane = fx.bcx.ins().band_imm(lane, 1); // mask to boolean
         res_val = f(fx, res_val, lane);
     }
+    let res_val = if fx.bcx.func.dfg.value_type(res_val) != types::I8 {
+        fx.bcx.ins().ireduce(types::I8, res_val)
+    } else {
+        res_val
+    };
     let res = CValue::by_val(res_val, ret.layout());
     ret.write_cvalue(fx, res);
 }
@@ -288,7 +292,11 @@ macro simd_cmp {
         if let Some(vector_ty) = vector_ty {
             let x = $x.load_scalar($fx);
             let y = $y.load_scalar($fx);
-            let val = $fx.bcx.ins().icmp(IntCC::$cc, x, y);
+            let val = if vector_ty.lane_type().is_float() {
+                $fx.bcx.ins().fcmp(FloatCC::$cc_f, x, y)
+            } else {
+                $fx.bcx.ins().icmp(IntCC::$cc, x, y)
+            };
 
             // HACK This depends on the fact that icmp for vectors represents bools as 0 and !0, not 0 and 1.
             let val = $fx.bcx.ins().raw_bitcast(vector_ty, val);
@@ -603,9 +611,6 @@ pub(crate) fn codegen_intrinsic_call<'tcx>(
             let (val, has_overflow) = checked_res.load_scalar_pair(fx);
             let clif_ty = fx.clif_type(T).unwrap();
 
-            // `select.i8` is not implemented by Cranelift.
-            let has_overflow = fx.bcx.ins().uextend(types::I32, has_overflow);
-
             let (min, max) = type_min_max_value(&mut fx.bcx, clif_ty, signed);
 
             let val = match (intrinsic, signed) {
@@ -632,21 +637,11 @@ pub(crate) fn codegen_intrinsic_call<'tcx>(
         };
         rotate_left, <T>(v x, v y) {
             let layout = fx.layout_of(T);
-            let y = if fx.bcx.func.dfg.value_type(y) == types::I128 {
-                fx.bcx.ins().ireduce(types::I64, y)
-            } else {
-                y
-            };
             let res = fx.bcx.ins().rotl(x, y);
             ret.write_cvalue(fx, CValue::by_val(res, layout));
         };
         rotate_right, <T>(v x, v y) {
             let layout = fx.layout_of(T);
-            let y = if fx.bcx.func.dfg.value_type(y) == types::I128 {
-                fx.bcx.ins().ireduce(types::I64, y)
-            } else {
-                y
-            };
             let res = fx.bcx.ins().rotr(x, y);
             ret.write_cvalue(fx, CValue::by_val(res, layout));
         };
@@ -684,35 +679,13 @@ pub(crate) fn codegen_intrinsic_call<'tcx>(
         };
         ctlz | ctlz_nonzero, <T> (v arg) {
             // FIXME trap on `ctlz_nonzero` with zero arg.
-            let res = if T == fx.tcx.types.u128 || T == fx.tcx.types.i128 {
-                // FIXME verify this algorithm is correct
-                let (lsb, msb) = fx.bcx.ins().isplit(arg);
-                let lsb_lz = fx.bcx.ins().clz(lsb);
-                let msb_lz = fx.bcx.ins().clz(msb);
-                let msb_is_zero = fx.bcx.ins().icmp_imm(IntCC::Equal, msb, 0);
-                let lsb_lz_plus_64 = fx.bcx.ins().iadd_imm(lsb_lz, 64);
-                let res = fx.bcx.ins().select(msb_is_zero, lsb_lz_plus_64, msb_lz);
-                fx.bcx.ins().uextend(types::I128, res)
-            } else {
-                fx.bcx.ins().clz(arg)
-            };
+            let res = fx.bcx.ins().clz(arg);
             let res = CValue::by_val(res, fx.layout_of(T));
             ret.write_cvalue(fx, res);
         };
         cttz | cttz_nonzero, <T> (v arg) {
             // FIXME trap on `cttz_nonzero` with zero arg.
-            let res = if T == fx.tcx.types.u128 || T == fx.tcx.types.i128 {
-                // FIXME verify this algorithm is correct
-                let (lsb, msb) = fx.bcx.ins().isplit(arg);
-                let lsb_tz = fx.bcx.ins().ctz(lsb);
-                let msb_tz = fx.bcx.ins().ctz(msb);
-                let lsb_is_zero = fx.bcx.ins().icmp_imm(IntCC::Equal, lsb, 0);
-                let msb_tz_plus_64 = fx.bcx.ins().iadd_imm(msb_tz, 64);
-                let res = fx.bcx.ins().select(lsb_is_zero, msb_tz_plus_64, lsb_tz);
-                fx.bcx.ins().uextend(types::I128, res)
-            } else {
-                fx.bcx.ins().ctz(arg)
-            };
+            let res = fx.bcx.ins().ctz(arg);
             let res = CValue::by_val(res, fx.layout_of(T));
             ret.write_cvalue(fx, res);
         };
@@ -995,8 +968,6 @@ pub(crate) fn codegen_intrinsic_call<'tcx>(
             let old = CValue::by_val(old, layout);
             ret.write_cvalue(fx, old);
         };
-
-        // FIXME https://github.com/bytecodealliance/wasmtime/issues/2647
         _ if intrinsic.as_str().starts_with("atomic_nand"), (v ptr, c src) {
             let layout = src.layout();
             validate_atomic_type!(fx, intrinsic, span, layout.ty);
@@ -1058,23 +1029,39 @@ pub(crate) fn codegen_intrinsic_call<'tcx>(
             ret.write_cvalue(fx, old);
         };
 
+        // In Rust floating point min and max don't propagate NaN. In Cranelift they do however.
+        // For this reason it is necessary to use `a.is_nan() ? b : (a >= b ? b : a)` for `minnumf*`
+        // and `a.is_nan() ? b : (a <= b ? b : a)` for `maxnumf*`. NaN checks are done by comparing
+        // a float against itself. Only in case of NaN is it not equal to itself.
         minnumf32, (v a, v b) {
-            let val = fx.bcx.ins().fmin(a, b);
+            let a_is_nan = fx.bcx.ins().fcmp(FloatCC::NotEqual, a, a);
+            let a_ge_b = fx.bcx.ins().fcmp(FloatCC::GreaterThanOrEqual, a, b);
+            let temp = fx.bcx.ins().select(a_ge_b, b, a);
+            let val = fx.bcx.ins().select(a_is_nan, b, temp);
             let val = CValue::by_val(val, fx.layout_of(fx.tcx.types.f32));
             ret.write_cvalue(fx, val);
         };
         minnumf64, (v a, v b) {
-            let val = fx.bcx.ins().fmin(a, b);
+            let a_is_nan = fx.bcx.ins().fcmp(FloatCC::NotEqual, a, a);
+            let a_ge_b = fx.bcx.ins().fcmp(FloatCC::GreaterThanOrEqual, a, b);
+            let temp = fx.bcx.ins().select(a_ge_b, b, a);
+            let val = fx.bcx.ins().select(a_is_nan, b, temp);
             let val = CValue::by_val(val, fx.layout_of(fx.tcx.types.f64));
             ret.write_cvalue(fx, val);
         };
         maxnumf32, (v a, v b) {
-            let val = fx.bcx.ins().fmax(a, b);
+            let a_is_nan = fx.bcx.ins().fcmp(FloatCC::NotEqual, a, a);
+            let a_le_b = fx.bcx.ins().fcmp(FloatCC::LessThanOrEqual, a, b);
+            let temp = fx.bcx.ins().select(a_le_b, b, a);
+            let val = fx.bcx.ins().select(a_is_nan, b, temp);
             let val = CValue::by_val(val, fx.layout_of(fx.tcx.types.f32));
             ret.write_cvalue(fx, val);
         };
         maxnumf64, (v a, v b) {
-            let val = fx.bcx.ins().fmax(a, b);
+            let a_is_nan = fx.bcx.ins().fcmp(FloatCC::NotEqual, a, a);
+            let a_le_b = fx.bcx.ins().fcmp(FloatCC::LessThanOrEqual, a, b);
+            let temp = fx.bcx.ins().select(a_le_b, b, a);
+            let val = fx.bcx.ins().select(a_is_nan, b, temp);
             let val = CValue::by_val(val, fx.layout_of(fx.tcx.types.f64));
             ret.write_cvalue(fx, val);
         };
@@ -1122,6 +1109,7 @@ pub(crate) fn codegen_intrinsic_call<'tcx>(
             }
 
             let size = fx.layout_of(T).layout.size;
+            // FIXME add and use emit_small_memcmp
             let is_eq_value =
                 if size == Size::ZERO {
                     // No bytes means they're trivially equal
@@ -1137,10 +1125,9 @@ pub(crate) fn codegen_intrinsic_call<'tcx>(
                 } else {
                     // Just call `memcmp` (like slices do in core) when the
                     // size is too large or it's not a power-of-two.
-                    let ptr_ty = pointer_ty(fx.tcx);
                     let signed_bytes = i64::try_from(size.bytes()).unwrap();
-                    let bytes_val = fx.bcx.ins().iconst(ptr_ty, signed_bytes);
-                    let params = vec![AbiParam::new(ptr_ty); 3];
+                    let bytes_val = fx.bcx.ins().iconst(fx.pointer_type, signed_bytes);
+                    let params = vec![AbiParam::new(fx.pointer_type); 3];
                     let returns = vec![AbiParam::new(types::I32)];
                     let args = &[lhs_ref, rhs_ref, bytes_val];
                     let cmp = fx.lib_call("memcmp", params, returns, args)[0];
diff --git a/compiler/rustc_codegen_cranelift/src/intrinsics/simd.rs b/compiler/rustc_codegen_cranelift/src/intrinsics/simd.rs
index c2f469fa021..43e68b4afa9 100644
--- a/compiler/rustc_codegen_cranelift/src/intrinsics/simd.rs
+++ b/compiler/rustc_codegen_cranelift/src/intrinsics/simd.rs
@@ -108,11 +108,11 @@ pub(super) fn codegen_simd_intrinsic_call<'tcx>(
 
             for (out_idx, in_idx) in indexes.into_iter().enumerate() {
                 let in_lane = if u64::from(in_idx) < lane_count {
-                    x.value_field(fx, mir::Field::new(in_idx.into()))
+                    x.value_lane(fx, in_idx.into())
                 } else {
-                    y.value_field(fx, mir::Field::new(usize::from(in_idx) - usize::try_from(lane_count).unwrap()))
+                    y.value_lane(fx, u64::from(in_idx) - lane_count)
                 };
-                let out_lane = ret.place_field(fx, mir::Field::new(out_idx));
+                let out_lane = ret.place_lane(fx, u64::try_from(out_idx).unwrap());
                 out_lane.write_cvalue(fx, in_lane);
             }
         };
@@ -163,10 +163,38 @@ pub(super) fn codegen_simd_intrinsic_call<'tcx>(
                 fx.tcx.sess.span_fatal(fx.mir.span, &format!("[simd_extract] idx {} >= lane_count {}", idx, lane_count));
             }
 
-            let ret_lane = v.value_field(fx, mir::Field::new(idx.try_into().unwrap()));
+            let ret_lane = v.value_lane(fx, idx.try_into().unwrap());
             ret.write_cvalue(fx, ret_lane);
         };
 
+        simd_neg, (c a) {
+            validate_simd_type!(fx, intrinsic, span, a.layout().ty);
+            simd_for_each_lane(fx, a, ret, |fx, lane_layout, ret_lane_layout, lane| {
+                let ret_lane = match lane_layout.ty.kind() {
+                    ty::Int(_) => fx.bcx.ins().ineg(lane),
+                    ty::Float(_) => fx.bcx.ins().fneg(lane),
+                    _ => unreachable!(),
+                };
+                CValue::by_val(ret_lane, ret_lane_layout)
+            });
+        };
+
+        simd_fabs, (c a) {
+            validate_simd_type!(fx, intrinsic, span, a.layout().ty);
+            simd_for_each_lane(fx, a, ret, |fx, _lane_layout, ret_lane_layout, lane| {
+                let ret_lane = fx.bcx.ins().fabs(lane);
+                CValue::by_val(ret_lane, ret_lane_layout)
+            });
+        };
+
+        simd_fsqrt, (c a) {
+            validate_simd_type!(fx, intrinsic, span, a.layout().ty);
+            simd_for_each_lane(fx, a, ret, |fx, _lane_layout, ret_lane_layout, lane| {
+                let ret_lane = fx.bcx.ins().sqrt(lane);
+                CValue::by_val(ret_lane, ret_lane_layout)
+            });
+        };
+
         simd_add, (c x, c y) {
             validate_simd_type!(fx, intrinsic, span, x.layout().ty);
             simd_int_flt_binop!(fx, iadd|fadd(x, y) -> ret);
@@ -183,6 +211,29 @@ pub(super) fn codegen_simd_intrinsic_call<'tcx>(
             validate_simd_type!(fx, intrinsic, span, x.layout().ty);
             simd_int_flt_binop!(fx, udiv|sdiv|fdiv(x, y) -> ret);
         };
+        simd_rem, (c x, c y) {
+            validate_simd_type!(fx, intrinsic, span, x.layout().ty);
+            simd_pair_for_each_lane(fx, x, y, ret, |fx, lane_layout, ret_lane_layout, x_lane, y_lane| {
+                let res_lane = match lane_layout.ty.kind() {
+                    ty::Uint(_) => fx.bcx.ins().urem(x_lane, y_lane),
+                    ty::Int(_) => fx.bcx.ins().srem(x_lane, y_lane),
+                    ty::Float(FloatTy::F32) => fx.lib_call(
+                        "fmodf",
+                        vec![AbiParam::new(types::F32), AbiParam::new(types::F32)],
+                        vec![AbiParam::new(types::F32)],
+                        &[x_lane, y_lane],
+                    )[0],
+                    ty::Float(FloatTy::F64) => fx.lib_call(
+                        "fmod",
+                        vec![AbiParam::new(types::F64), AbiParam::new(types::F64)],
+                        vec![AbiParam::new(types::F64)],
+                        &[x_lane, y_lane],
+                    )[0],
+                    _ => unreachable!("{:?}", lane_layout.ty),
+                };
+                CValue::by_val(res_lane, ret_lane_layout)
+            });
+        };
         simd_shl, (c x, c y) {
             validate_simd_type!(fx, intrinsic, span, x.layout().ty);
             simd_int_binop!(fx, ishl(x, y) -> ret);
@@ -216,15 +267,14 @@ pub(super) fn codegen_simd_intrinsic_call<'tcx>(
             let ret_lane_layout = fx.layout_of(ret_lane_ty);
 
             for lane in 0..lane_count {
-                let lane = mir::Field::new(lane.try_into().unwrap());
-                let a_lane = a.value_field(fx, lane).load_scalar(fx);
-                let b_lane = b.value_field(fx, lane).load_scalar(fx);
-                let c_lane = c.value_field(fx, lane).load_scalar(fx);
+                let a_lane = a.value_lane(fx, lane).load_scalar(fx);
+                let b_lane = b.value_lane(fx, lane).load_scalar(fx);
+                let c_lane = c.value_lane(fx, lane).load_scalar(fx);
 
                 let mul_lane = fx.bcx.ins().fmul(a_lane, b_lane);
                 let res_lane = CValue::by_val(fx.bcx.ins().fadd(mul_lane, c_lane), ret_lane_layout);
 
-                ret.place_field(fx, lane).write_cvalue(fx, res_lane);
+                ret.place_lane(fx, lane).write_cvalue(fx, res_lane);
             }
         };
 
@@ -237,9 +287,52 @@ pub(super) fn codegen_simd_intrinsic_call<'tcx>(
             simd_flt_binop!(fx, fmax(x, y) -> ret);
         };
 
-        simd_reduce_add_ordered | simd_reduce_add_unordered, (c v) {
+        simd_round, (c a) {
+            validate_simd_type!(fx, intrinsic, span, a.layout().ty);
+            simd_for_each_lane(fx, a, ret, |fx, lane_layout, ret_lane_layout, lane| {
+                let res_lane = match lane_layout.ty.kind() {
+                    ty::Float(FloatTy::F32) => fx.lib_call(
+                        "roundf",
+                        vec![AbiParam::new(types::F32)],
+                        vec![AbiParam::new(types::F32)],
+                        &[lane],
+                    )[0],
+                    ty::Float(FloatTy::F64) => fx.lib_call(
+                        "round",
+                        vec![AbiParam::new(types::F64)],
+                        vec![AbiParam::new(types::F64)],
+                        &[lane],
+                    )[0],
+                    _ => unreachable!("{:?}", lane_layout.ty),
+                };
+                CValue::by_val(res_lane, ret_lane_layout)
+            });
+        };
+        simd_ceil, (c a) {
+            validate_simd_type!(fx, intrinsic, span, a.layout().ty);
+            simd_for_each_lane(fx, a, ret, |fx, _lane_layout, ret_lane_layout, lane| {
+                let ret_lane = fx.bcx.ins().ceil(lane);
+                CValue::by_val(ret_lane, ret_lane_layout)
+            });
+        };
+        simd_floor, (c a) {
+            validate_simd_type!(fx, intrinsic, span, a.layout().ty);
+            simd_for_each_lane(fx, a, ret, |fx, _lane_layout, ret_lane_layout, lane| {
+                let ret_lane = fx.bcx.ins().floor(lane);
+                CValue::by_val(ret_lane, ret_lane_layout)
+            });
+        };
+        simd_trunc, (c a) {
+            validate_simd_type!(fx, intrinsic, span, a.layout().ty);
+            simd_for_each_lane(fx, a, ret, |fx, _lane_layout, ret_lane_layout, lane| {
+                let ret_lane = fx.bcx.ins().trunc(lane);
+                CValue::by_val(ret_lane, ret_lane_layout)
+            });
+        };
+
+        simd_reduce_add_ordered | simd_reduce_add_unordered, (c v, v acc) {
             validate_simd_type!(fx, intrinsic, span, v.layout().ty);
-            simd_reduce(fx, v, ret, |fx, lane_layout, a, b| {
+            simd_reduce(fx, v, Some(acc), ret, |fx, lane_layout, a, b| {
                 if lane_layout.ty.is_floating_point() {
                     fx.bcx.ins().fadd(a, b)
                 } else {
@@ -248,9 +341,9 @@ pub(super) fn codegen_simd_intrinsic_call<'tcx>(
             });
         };
 
-        simd_reduce_mul_ordered | simd_reduce_mul_unordered, (c v) {
+        simd_reduce_mul_ordered | simd_reduce_mul_unordered, (c v, v acc) {
             validate_simd_type!(fx, intrinsic, span, v.layout().ty);
-            simd_reduce(fx, v, ret, |fx, lane_layout, a, b| {
+            simd_reduce(fx, v, Some(acc), ret, |fx, lane_layout, a, b| {
                 if lane_layout.ty.is_floating_point() {
                     fx.bcx.ins().fmul(a, b)
                 } else {
@@ -269,13 +362,70 @@ pub(super) fn codegen_simd_intrinsic_call<'tcx>(
             simd_reduce_bool(fx, v, ret, |fx, a, b| fx.bcx.ins().bor(a, b));
         };
 
-        // simd_fabs
-        // simd_saturating_add
+        simd_reduce_and, (c v) {
+            validate_simd_type!(fx, intrinsic, span, v.layout().ty);
+            simd_reduce(fx, v, None, ret, |fx, _layout, a, b| fx.bcx.ins().band(a, b));
+        };
+
+        simd_reduce_or, (c v) {
+            validate_simd_type!(fx, intrinsic, span, v.layout().ty);
+            simd_reduce(fx, v, None, ret, |fx, _layout, a, b| fx.bcx.ins().bor(a, b));
+        };
+
+        simd_reduce_xor, (c v) {
+            validate_simd_type!(fx, intrinsic, span, v.layout().ty);
+            simd_reduce(fx, v, None, ret, |fx, _layout, a, b| fx.bcx.ins().bxor(a, b));
+        };
+
+        simd_reduce_min, (c v) {
+            // FIXME support floats
+            validate_simd_type!(fx, intrinsic, span, v.layout().ty);
+            simd_reduce(fx, v, None, ret, |fx, layout, a, b| {
+                let lt = fx.bcx.ins().icmp(if layout.ty.is_signed() {
+                    IntCC::SignedLessThan
+                } else {
+                    IntCC::UnsignedLessThan
+                }, a, b);
+                fx.bcx.ins().select(lt, a, b)
+            });
+        };
+
+        simd_reduce_max, (c v) {
+            // FIXME support floats
+            validate_simd_type!(fx, intrinsic, span, v.layout().ty);
+            simd_reduce(fx, v, None, ret, |fx, layout, a, b| {
+                let gt = fx.bcx.ins().icmp(if layout.ty.is_signed() {
+                    IntCC::SignedGreaterThan
+                } else {
+                    IntCC::UnsignedGreaterThan
+                }, a, b);
+                fx.bcx.ins().select(gt, a, b)
+            });
+        };
+
+        simd_select, (c m, c a, c b) {
+            validate_simd_type!(fx, intrinsic, span, m.layout().ty);
+            validate_simd_type!(fx, intrinsic, span, a.layout().ty);
+            assert_eq!(a.layout(), b.layout());
+
+            let (lane_count, lane_ty) = a.layout().ty.simd_size_and_type(fx.tcx);
+            let lane_layout = fx.layout_of(lane_ty);
+
+            for lane in 0..lane_count {
+                let m_lane = m.value_lane(fx, lane).load_scalar(fx);
+                let a_lane = a.value_lane(fx, lane).load_scalar(fx);
+                let b_lane = b.value_lane(fx, lane).load_scalar(fx);
+
+                let m_lane = fx.bcx.ins().icmp_imm(IntCC::Equal, m_lane, 0);
+                let res_lane = CValue::by_val(fx.bcx.ins().select(m_lane, b_lane, a_lane), lane_layout);
+
+                ret.place_lane(fx, lane).write_cvalue(fx, res_lane);
+            }
+        };
+
+        // simd_saturating_*
         // simd_bitmask
-        // simd_select
-        // simd_rem
-        // simd_neg
-        // simd_trunc
-        // simd_floor
+        // simd_scatter
+        // simd_gather
     }
 }
diff --git a/compiler/rustc_codegen_cranelift/src/lib.rs b/compiler/rustc_codegen_cranelift/src/lib.rs
index e32dae49131..4ef53663ca0 100644
--- a/compiler/rustc_codegen_cranelift/src/lib.rs
+++ b/compiler/rustc_codegen_cranelift/src/lib.rs
@@ -184,6 +184,9 @@ impl CodegenBackend for CraneliftCodegenBackend {
         let config = if let Some(config) = self.config.clone() {
             config
         } else {
+            if !tcx.sess.unstable_options() && !tcx.sess.opts.cg.llvm_args.is_empty() {
+                tcx.sess.fatal("`-Z unstable-options` must be passed to allow configuring cg_clif");
+            }
             BackendConfig::from_opts(&tcx.sess.opts.cg.llvm_args)
                 .unwrap_or_else(|err| tcx.sess.fatal(&err))
         };
@@ -217,16 +220,15 @@ impl CodegenBackend for CraneliftCodegenBackend {
     ) -> Result<(), ErrorReported> {
         use rustc_codegen_ssa::back::link::link_binary;
 
-        link_binary::<crate::archive::ArArchiveBuilder<'_>>(
-            sess,
-            &codegen_results,
-            outputs,
-        )
+        link_binary::<crate::archive::ArArchiveBuilder<'_>>(sess, &codegen_results, outputs)
     }
 }
 
 fn target_triple(sess: &Session) -> target_lexicon::Triple {
-    sess.target.llvm_target.parse().unwrap()
+    match sess.target.llvm_target.parse() {
+        Ok(triple) => triple,
+        Err(err) => sess.fatal(&format!("target not recognized: {}", err)),
+    }
 }
 
 fn build_isa(sess: &Session, backend_config: &BackendConfig) -> Box<dyn isa::TargetIsa + 'static> {
@@ -276,15 +278,21 @@ fn build_isa(sess: &Session, backend_config: &BackendConfig) -> Box<dyn isa::Tar
         }
         Some(value) => {
             let mut builder =
-                cranelift_codegen::isa::lookup_variant(target_triple, variant).unwrap();
+                cranelift_codegen::isa::lookup_variant(target_triple.clone(), variant)
+                    .unwrap_or_else(|err| {
+                        sess.fatal(&format!("can't compile for {}: {}", target_triple, err));
+                    });
             if let Err(_) = builder.enable(value) {
-                sess.fatal("The specified target cpu isn't currently supported by Cranelift.");
+                sess.fatal("the specified target cpu isn't currently supported by Cranelift.");
             }
             builder
         }
         None => {
             let mut builder =
-                cranelift_codegen::isa::lookup_variant(target_triple.clone(), variant).unwrap();
+                cranelift_codegen::isa::lookup_variant(target_triple.clone(), variant)
+                    .unwrap_or_else(|err| {
+                        sess.fatal(&format!("can't compile for {}: {}", target_triple, err));
+                    });
             if target_triple.architecture == target_lexicon::Architecture::X86_64 {
                 // Don't use "haswell" as the default, as it implies `has_lzcnt`.
                 // macOS CI is still at Ivy Bridge EP, so `lzcnt` is interpreted as `bsr`.
diff --git a/compiler/rustc_codegen_cranelift/src/num.rs b/compiler/rustc_codegen_cranelift/src/num.rs
index b6d378a5fe1..545d390e269 100644
--- a/compiler/rustc_codegen_cranelift/src/num.rs
+++ b/compiler/rustc_codegen_cranelift/src/num.rs
@@ -67,19 +67,6 @@ pub(crate) fn codegen_binop<'tcx>(
                     let lhs = in_lhs.load_scalar(fx);
                     let rhs = in_rhs.load_scalar(fx);
 
-                    let (lhs, rhs) = if (bin_op == BinOp::Eq || bin_op == BinOp::Ne)
-                        && (in_lhs.layout().ty.kind() == fx.tcx.types.i8.kind()
-                            || in_lhs.layout().ty.kind() == fx.tcx.types.i16.kind())
-                    {
-                        // FIXME(CraneStation/cranelift#896) icmp_imm.i8/i16 with eq/ne for signed ints is implemented wrong.
-                        (
-                            fx.bcx.ins().sextend(types::I32, lhs),
-                            fx.bcx.ins().sextend(types::I32, rhs),
-                        )
-                    } else {
-                        (lhs, rhs)
-                    };
-
                     return codegen_compare_bin_op(fx, bin_op, signed, lhs, rhs);
                 }
                 _ => {}
@@ -293,9 +280,8 @@ pub(crate) fn codegen_checked_int_binop<'tcx>(
         }
         BinOp::Shl => {
             let lhs_ty = fx.bcx.func.dfg.value_type(lhs);
-            let actual_shift = fx.bcx.ins().band_imm(rhs, i64::from(lhs_ty.bits() - 1));
-            let actual_shift = clif_intcast(fx, actual_shift, types::I8, false);
-            let val = fx.bcx.ins().ishl(lhs, actual_shift);
+            let masked_shift = fx.bcx.ins().band_imm(rhs, i64::from(lhs_ty.bits() - 1));
+            let val = fx.bcx.ins().ishl(lhs, masked_shift);
             let ty = fx.bcx.func.dfg.value_type(val);
             let max_shift = i64::from(ty.bits()) - 1;
             let has_overflow = fx.bcx.ins().icmp_imm(IntCC::UnsignedGreaterThan, rhs, max_shift);
@@ -303,12 +289,11 @@ pub(crate) fn codegen_checked_int_binop<'tcx>(
         }
         BinOp::Shr => {
             let lhs_ty = fx.bcx.func.dfg.value_type(lhs);
-            let actual_shift = fx.bcx.ins().band_imm(rhs, i64::from(lhs_ty.bits() - 1));
-            let actual_shift = clif_intcast(fx, actual_shift, types::I8, false);
+            let masked_shift = fx.bcx.ins().band_imm(rhs, i64::from(lhs_ty.bits() - 1));
             let val = if !signed {
-                fx.bcx.ins().ushr(lhs, actual_shift)
+                fx.bcx.ins().ushr(lhs, masked_shift)
             } else {
-                fx.bcx.ins().sshr(lhs, actual_shift)
+                fx.bcx.ins().sshr(lhs, masked_shift)
             };
             let ty = fx.bcx.func.dfg.value_type(val);
             let max_shift = i64::from(ty.bits()) - 1;
diff --git a/compiler/rustc_codegen_cranelift/src/optimize/peephole.rs b/compiler/rustc_codegen_cranelift/src/optimize/peephole.rs
index b95e2d72877..d637b4d8929 100644
--- a/compiler/rustc_codegen_cranelift/src/optimize/peephole.rs
+++ b/compiler/rustc_codegen_cranelift/src/optimize/peephole.rs
@@ -1,8 +1,6 @@
 //! Peephole optimizations that can be performed while creating clif ir.
 
-use cranelift_codegen::ir::{
-    condcodes::IntCC, types, InstBuilder, InstructionData, Opcode, Value, ValueDef,
-};
+use cranelift_codegen::ir::{condcodes::IntCC, InstructionData, Opcode, Value, ValueDef};
 use cranelift_frontend::FunctionBuilder;
 
 /// If the given value was produced by a `bint` instruction, return it's input, otherwise return the
@@ -37,43 +35,6 @@ pub(crate) fn maybe_unwrap_bool_not(bcx: &mut FunctionBuilder<'_>, arg: Value) -
     }
 }
 
-pub(crate) fn make_branchable_value(bcx: &mut FunctionBuilder<'_>, arg: Value) -> Value {
-    if bcx.func.dfg.value_type(arg).is_bool() {
-        return arg;
-    }
-
-    (|| {
-        let arg_inst = if let ValueDef::Result(arg_inst, 0) = bcx.func.dfg.value_def(arg) {
-            arg_inst
-        } else {
-            return None;
-        };
-
-        match bcx.func.dfg[arg_inst] {
-            // This is the lowering of Rvalue::Not
-            InstructionData::Load { opcode: Opcode::Load, arg: ptr, flags, offset } => {
-                // Using `load.i8 + uextend.i32` would legalize to `uload8 + ireduce.i8 +
-                // uextend.i32`. Just `uload8` is much faster.
-                match bcx.func.dfg.ctrl_typevar(arg_inst) {
-                    types::I8 => Some(bcx.ins().uload8(types::I32, flags, ptr, offset)),
-                    types::I16 => Some(bcx.ins().uload16(types::I32, flags, ptr, offset)),
-                    _ => None,
-                }
-            }
-            _ => None,
-        }
-    })()
-    .unwrap_or_else(|| {
-        match bcx.func.dfg.value_type(arg) {
-            types::I8 | types::I16 => {
-                // WORKAROUND for brz.i8 and brnz.i8 not yet being implemented
-                bcx.ins().uextend(types::I32, arg)
-            }
-            _ => arg,
-        }
-    })
-}
-
 /// Returns whether the branch is statically known to be taken or `None` if it isn't statically known.
 pub(crate) fn maybe_known_branch_taken(
     bcx: &FunctionBuilder<'_>,
diff --git a/compiler/rustc_codegen_cranelift/src/trap.rs b/compiler/rustc_codegen_cranelift/src/trap.rs
index 21d3e68dbc7..fe8d20fa39f 100644
--- a/compiler/rustc_codegen_cranelift/src/trap.rs
+++ b/compiler/rustc_codegen_cranelift/src/trap.rs
@@ -10,7 +10,7 @@ fn codegen_print(fx: &mut FunctionCx<'_, '_, '_>, msg: &str) {
             Linkage::Import,
             &Signature {
                 call_conv: CallConv::triple_default(fx.triple()),
-                params: vec![AbiParam::new(pointer_ty(fx.tcx))],
+                params: vec![AbiParam::new(fx.pointer_type)],
                 returns: vec![AbiParam::new(types::I32)],
             },
         )
diff --git a/compiler/rustc_codegen_cranelift/src/unsize.rs b/compiler/rustc_codegen_cranelift/src/unsize.rs
index d594731b4df..d9c4647cba3 100644
--- a/compiler/rustc_codegen_cranelift/src/unsize.rs
+++ b/compiler/rustc_codegen_cranelift/src/unsize.rs
@@ -77,12 +77,10 @@ fn unsize_ptr<'tcx>(
         (&ty::Ref(_, a, _), &ty::Ref(_, b, _))
         | (&ty::Ref(_, a, _), &ty::RawPtr(ty::TypeAndMut { ty: b, .. }))
         | (&ty::RawPtr(ty::TypeAndMut { ty: a, .. }), &ty::RawPtr(ty::TypeAndMut { ty: b, .. })) => {
-            assert!(!fx.layout_of(a).is_unsized());
             (src, unsized_info(fx, a, b, old_info))
         }
         (&ty::Adt(def_a, _), &ty::Adt(def_b, _)) if def_a.is_box() && def_b.is_box() => {
             let (a, b) = (src_layout.ty.boxed_ty(), dst_layout.ty.boxed_ty());
-            assert!(!fx.layout_of(a).is_unsized());
             (src, unsized_info(fx, a, b, old_info))
         }
         (&ty::Adt(def_a, _), &ty::Adt(def_b, _)) => {
diff --git a/compiler/rustc_codegen_cranelift/src/value_and_place.rs b/compiler/rustc_codegen_cranelift/src/value_and_place.rs
index ae8ccc626b4..364b3da92b8 100644
--- a/compiler/rustc_codegen_cranelift/src/value_and_place.rs
+++ b/compiler/rustc_codegen_cranelift/src/value_and_place.rs
@@ -34,10 +34,10 @@ fn codegen_field<'tcx>(
                 let (_, unsized_align) =
                     crate::unsize::size_and_align_of_dst(fx, field_layout, extra);
 
-                let one = fx.bcx.ins().iconst(pointer_ty(fx.tcx), 1);
+                let one = fx.bcx.ins().iconst(fx.pointer_type, 1);
                 let align_sub_1 = fx.bcx.ins().isub(unsized_align, one);
                 let and_lhs = fx.bcx.ins().iadd_imm(align_sub_1, unaligned_offset as i64);
-                let zero = fx.bcx.ins().iconst(pointer_ty(fx.tcx), 0);
+                let zero = fx.bcx.ins().iconst(fx.pointer_type, 0);
                 let and_rhs = fx.bcx.ins().isub(zero, unsized_align);
                 let offset = fx.bcx.ins().band(and_lhs, and_rhs);
 
@@ -206,6 +206,38 @@ impl<'tcx> CValue<'tcx> {
         }
     }
 
+    /// Like [`CValue::value_field`] except handling ADTs containing a single array field in a way
+    /// such that you can access individual lanes.
+    pub(crate) fn value_lane(
+        self,
+        fx: &mut FunctionCx<'_, '_, 'tcx>,
+        lane_idx: u64,
+    ) -> CValue<'tcx> {
+        let layout = self.1;
+        assert!(layout.ty.is_simd());
+        let (lane_count, lane_ty) = layout.ty.simd_size_and_type(fx.tcx);
+        let lane_layout = fx.layout_of(lane_ty);
+        assert!(lane_idx < lane_count);
+        match self.0 {
+            CValueInner::ByVal(val) => match layout.abi {
+                Abi::Vector { element: _, count: _ } => {
+                    assert!(lane_count <= u8::MAX.into(), "SIMD type with more than 255 lanes???");
+                    let lane_idx = u8::try_from(lane_idx).unwrap();
+                    let lane = fx.bcx.ins().extractlane(val, lane_idx);
+                    CValue::by_val(lane, lane_layout)
+                }
+                _ => unreachable!("value_lane for ByVal with abi {:?}", layout.abi),
+            },
+            CValueInner::ByValPair(_, _) => unreachable!(),
+            CValueInner::ByRef(ptr, None) => {
+                let field_offset = lane_layout.size * lane_idx;
+                let field_ptr = ptr.offset_i64(fx, i64::try_from(field_offset.bytes()).unwrap());
+                CValue::by_ref(field_ptr, lane_layout)
+            }
+            CValueInner::ByRef(_, Some(_)) => unreachable!(),
+        }
+    }
+
     pub(crate) fn unsize_value(self, fx: &mut FunctionCx<'_, '_, 'tcx>, dest: CPlace<'tcx>) {
         crate::unsize::coerce_unsized_into(fx, self, dest);
     }
@@ -286,17 +318,16 @@ impl<'tcx> CPlace<'tcx> {
         &self.inner
     }
 
-    pub(crate) fn no_place(layout: TyAndLayout<'tcx>) -> CPlace<'tcx> {
-        CPlace { inner: CPlaceInner::Addr(Pointer::dangling(layout.align.pref), None), layout }
-    }
-
     pub(crate) fn new_stack_slot(
         fx: &mut FunctionCx<'_, '_, 'tcx>,
         layout: TyAndLayout<'tcx>,
     ) -> CPlace<'tcx> {
         assert!(!layout.is_unsized());
         if layout.size.bytes() == 0 {
-            return CPlace::no_place(layout);
+            return CPlace {
+                inner: CPlaceInner::Addr(Pointer::dangling(layout.align.pref), None),
+                layout,
+            };
         }
 
         let stack_slot = fx.bcx.create_stack_slot(StackSlotData {
@@ -610,6 +641,38 @@ impl<'tcx> CPlace<'tcx> {
         }
     }
 
+    /// Like [`CPlace::place_field`] except handling ADTs containing a single array field in a way
+    /// such that you can access individual lanes.
+    pub(crate) fn place_lane(
+        self,
+        fx: &mut FunctionCx<'_, '_, 'tcx>,
+        lane_idx: u64,
+    ) -> CPlace<'tcx> {
+        let layout = self.layout();
+        assert!(layout.ty.is_simd());
+        let (lane_count, lane_ty) = layout.ty.simd_size_and_type(fx.tcx);
+        let lane_layout = fx.layout_of(lane_ty);
+        assert!(lane_idx < lane_count);
+
+        match self.inner {
+            CPlaceInner::Var(local, var) => {
+                assert!(matches!(layout.abi, Abi::Vector { .. }));
+                CPlace {
+                    inner: CPlaceInner::VarLane(local, var, lane_idx.try_into().unwrap()),
+                    layout: lane_layout,
+                }
+            }
+            CPlaceInner::VarPair(_, _, _) => unreachable!(),
+            CPlaceInner::VarLane(_, _, _) => unreachable!(),
+            CPlaceInner::Addr(ptr, None) => {
+                let field_offset = lane_layout.size * lane_idx;
+                let field_ptr = ptr.offset_i64(fx, i64::try_from(field_offset.bytes()).unwrap());
+                CPlace::for_ptr(field_ptr, lane_layout)
+            }
+            CPlaceInner::Addr(_, Some(_)) => unreachable!(),
+        }
+    }
+
     pub(crate) fn place_index(
         self,
         fx: &mut FunctionCx<'_, '_, 'tcx>,
diff --git a/compiler/rustc_codegen_cranelift/src/vtable.rs b/compiler/rustc_codegen_cranelift/src/vtable.rs
index 1b315874308..f97d416b66f 100644
--- a/compiler/rustc_codegen_cranelift/src/vtable.rs
+++ b/compiler/rustc_codegen_cranelift/src/vtable.rs
@@ -14,7 +14,7 @@ pub(crate) fn vtable_memflags() -> MemFlags {
 pub(crate) fn drop_fn_of_obj(fx: &mut FunctionCx<'_, '_, '_>, vtable: Value) -> Value {
     let usize_size = fx.layout_of(fx.tcx.types.usize).size.bytes() as usize;
     fx.bcx.ins().load(
-        pointer_ty(fx.tcx),
+        fx.pointer_type,
         vtable_memflags(),
         vtable,
         (ty::COMMON_VTABLE_ENTRIES_DROPINPLACE * usize_size) as i32,
@@ -24,7 +24,7 @@ pub(crate) fn drop_fn_of_obj(fx: &mut FunctionCx<'_, '_, '_>, vtable: Value) ->
 pub(crate) fn size_of_obj(fx: &mut FunctionCx<'_, '_, '_>, vtable: Value) -> Value {
     let usize_size = fx.layout_of(fx.tcx.types.usize).size.bytes() as usize;
     fx.bcx.ins().load(
-        pointer_ty(fx.tcx),
+        fx.pointer_type,
         vtable_memflags(),
         vtable,
         (ty::COMMON_VTABLE_ENTRIES_SIZE * usize_size) as i32,
@@ -34,7 +34,7 @@ pub(crate) fn size_of_obj(fx: &mut FunctionCx<'_, '_, '_>, vtable: Value) -> Val
 pub(crate) fn min_align_of_obj(fx: &mut FunctionCx<'_, '_, '_>, vtable: Value) -> Value {
     let usize_size = fx.layout_of(fx.tcx.types.usize).size.bytes() as usize;
     fx.bcx.ins().load(
-        pointer_ty(fx.tcx),
+        fx.pointer_type,
         vtable_memflags(),
         vtable,
         (ty::COMMON_VTABLE_ENTRIES_ALIGN * usize_size) as i32,
@@ -55,7 +55,7 @@ pub(crate) fn get_ptr_and_method_ref<'tcx>(
 
     let usize_size = fx.layout_of(fx.tcx.types.usize).size.bytes();
     let func_ref = fx.bcx.ins().load(
-        pointer_ty(fx.tcx),
+        fx.pointer_type,
         vtable_memflags(),
         vtable,
         (idx * usize_size as usize) as i32,
diff --git a/compiler/rustc_codegen_cranelift/y.rs b/compiler/rustc_codegen_cranelift/y.rs
index 43937588b48..26605003c42 100755
--- a/compiler/rustc_codegen_cranelift/y.rs
+++ b/compiler/rustc_codegen_cranelift/y.rs
@@ -15,8 +15,8 @@ exec ${0/.rs/.bin} $@
 //! for example:
 //!
 //! ```shell
-//! $ rustc y.rs -o build/y.bin
-//! $ build/y.bin
+//! $ rustc y.rs -o y.bin
+//! $ ./y.bin
 //! ```
 //!
 //! # Naming