about summary refs log tree commit diff
path: root/compiler/rustc_codegen_cranelift/src
diff options
context:
space:
mode:
authorbors <bors@rust-lang.org>2023-01-27 00:03:09 +0000
committerbors <bors@rust-lang.org>2023-01-27 00:03:09 +0000
commita2d002afe70015f621771d00c9cb4fe115f8cb11 (patch)
tree74229ed29090d8ee41ed826ae61e610e727e2397 /compiler/rustc_codegen_cranelift/src
parentd7948c843de94245c794e8c63dd4301a78bb5ba3 (diff)
parent3808bc4639468018b1e5c30e1cd2e6905485ce67 (diff)
downloadrust-a2d002afe70015f621771d00c9cb4fe115f8cb11.tar.gz
rust-a2d002afe70015f621771d00c9cb4fe115f8cb11.zip
Auto merge of #107269 - bjorn3:sync_cg_clif-2023-01-24, r=bjorn3
Sync rustc_codegen_cranelift

For cg_clif itself there have been a couple of bug fixes since the last sync, a Cranelift update and implemented all remaining simd platform intrinsics used by `std::simd`. (`std::arch` still misses a lot though) Most of the diff is from reworking of the cg_clif build system though.

r? `@ghost`

`@rustbot` label +A-codegen +A-cranelift +T-compiler
Diffstat (limited to 'compiler/rustc_codegen_cranelift/src')
-rw-r--r--compiler/rustc_codegen_cranelift/src/abi/mod.rs37
-rw-r--r--compiler/rustc_codegen_cranelift/src/base.rs42
-rw-r--r--compiler/rustc_codegen_cranelift/src/common.rs9
-rw-r--r--compiler/rustc_codegen_cranelift/src/cranelift_native.rs248
-rw-r--r--compiler/rustc_codegen_cranelift/src/debuginfo/mod.rs14
-rw-r--r--compiler/rustc_codegen_cranelift/src/driver/aot.rs18
-rw-r--r--compiler/rustc_codegen_cranelift/src/intrinsics/llvm_x86.rs4
-rw-r--r--compiler/rustc_codegen_cranelift/src/intrinsics/mod.rs11
-rw-r--r--compiler/rustc_codegen_cranelift/src/intrinsics/simd.rs122
-rw-r--r--compiler/rustc_codegen_cranelift/src/lib.rs8
-rw-r--r--compiler/rustc_codegen_cranelift/src/main_shim.rs15
-rw-r--r--compiler/rustc_codegen_cranelift/src/optimize/peephole.rs4
-rw-r--r--compiler/rustc_codegen_cranelift/src/value_and_place.rs4
13 files changed, 484 insertions, 52 deletions
diff --git a/compiler/rustc_codegen_cranelift/src/abi/mod.rs b/compiler/rustc_codegen_cranelift/src/abi/mod.rs
index 65cc6b43767..3c34585d419 100644
--- a/compiler/rustc_codegen_cranelift/src/abi/mod.rs
+++ b/compiler/rustc_codegen_cranelift/src/abi/mod.rs
@@ -7,6 +7,7 @@ mod returning;
 use cranelift_module::ModuleError;
 use rustc_middle::middle::codegen_fn_attrs::CodegenFnAttrFlags;
 use rustc_middle::ty::layout::FnAbiOf;
+use rustc_session::Session;
 use rustc_target::abi::call::{Conv, FnAbi};
 use rustc_target::spec::abi::Abi;
 
@@ -22,7 +23,7 @@ fn clif_sig_from_fn_abi<'tcx>(
     default_call_conv: CallConv,
     fn_abi: &FnAbi<'tcx, Ty<'tcx>>,
 ) -> Signature {
-    let call_conv = conv_to_call_conv(fn_abi.conv, default_call_conv);
+    let call_conv = conv_to_call_conv(tcx.sess, fn_abi.conv, default_call_conv);
 
     let inputs = fn_abi.args.iter().map(|arg_abi| arg_abi.get_abi_param(tcx).into_iter()).flatten();
 
@@ -33,24 +34,32 @@ fn clif_sig_from_fn_abi<'tcx>(
     Signature { params, returns, call_conv }
 }
 
-pub(crate) fn conv_to_call_conv(c: Conv, default_call_conv: CallConv) -> CallConv {
+pub(crate) fn conv_to_call_conv(sess: &Session, c: Conv, default_call_conv: CallConv) -> CallConv {
     match c {
         Conv::Rust | Conv::C => default_call_conv,
         Conv::RustCold => CallConv::Cold,
         Conv::X86_64SysV => CallConv::SystemV,
         Conv::X86_64Win64 => CallConv::WindowsFastcall,
-        Conv::ArmAapcs
-        | Conv::CCmseNonSecureCall
-        | Conv::Msp430Intr
+
+        // Should already get a back compat warning
+        Conv::X86Fastcall | Conv::X86Stdcall | Conv::X86ThisCall | Conv::X86VectorCall => {
+            default_call_conv
+        }
+
+        Conv::X86Intr => sess.fatal("x86-interrupt call conv not yet implemented"),
+
+        Conv::ArmAapcs => sess.fatal("aapcs call conv not yet implemented"),
+        Conv::CCmseNonSecureCall => {
+            sess.fatal("C-cmse-nonsecure-call call conv is not yet implemented");
+        }
+
+        Conv::Msp430Intr
         | Conv::PtxKernel
-        | Conv::X86Fastcall
-        | Conv::X86Intr
-        | Conv::X86Stdcall
-        | Conv::X86ThisCall
-        | Conv::X86VectorCall
         | Conv::AmdGpuKernel
         | Conv::AvrInterrupt
-        | Conv::AvrNonBlockingInterrupt => todo!("{:?}", c),
+        | Conv::AvrNonBlockingInterrupt => {
+            unreachable!("tried to use {c:?} call conv which only exists on an unsupported target");
+        }
     }
 }
 
@@ -161,6 +170,12 @@ fn make_local_place<'tcx>(
     layout: TyAndLayout<'tcx>,
     is_ssa: bool,
 ) -> CPlace<'tcx> {
+    if layout.is_unsized() {
+        fx.tcx.sess.span_fatal(
+            fx.mir.local_decls[local].source_info.span,
+            "unsized locals are not yet supported",
+        );
+    }
     let place = if is_ssa {
         if let rustc_target::abi::Abi::ScalarPair(_, _) = layout.abi {
             CPlace::new_var_pair(fx, local, layout)
diff --git a/compiler/rustc_codegen_cranelift/src/base.rs b/compiler/rustc_codegen_cranelift/src/base.rs
index 89d955e8bf2..d3a8c10657e 100644
--- a/compiler/rustc_codegen_cranelift/src/base.rs
+++ b/compiler/rustc_codegen_cranelift/src/base.rs
@@ -113,6 +113,8 @@ pub(crate) fn codegen_fn<'tcx>(
     };
 
     tcx.sess.time("codegen clif ir", || codegen_fn_body(&mut fx, start_block));
+    fx.bcx.seal_all_blocks();
+    fx.bcx.finalize();
 
     // Recover all necessary data from fx, before accessing func will prevent future access to it.
     let symbol_name = fx.symbol_name;
@@ -303,6 +305,9 @@ fn codegen_fn_body(fx: &mut FunctionCx<'_, '_, '_>, start_block: Block) {
         let source_info = bb_data.terminator().source_info;
         fx.set_debug_loc(source_info);
 
+        let _print_guard =
+            crate::PrintOnPanic(|| format!("terminator {:?}", bb_data.terminator().kind));
+
         match &bb_data.terminator().kind {
             TerminatorKind::Goto { target } => {
                 if let TerminatorKind::Return = fx.mir[*target].terminator().kind {
@@ -464,7 +469,10 @@ fn codegen_fn_body(fx: &mut FunctionCx<'_, '_, '_>, start_block: Block) {
                     *destination,
                 );
             }
-            TerminatorKind::Resume | TerminatorKind::Abort => {
+            TerminatorKind::Abort => {
+                codegen_panic_cannot_unwind(fx, source_info);
+            }
+            TerminatorKind::Resume => {
                 // FIXME implement unwinding
                 fx.bcx.ins().trap(TrapCode::UnreachableCodeReached);
             }
@@ -487,9 +495,6 @@ fn codegen_fn_body(fx: &mut FunctionCx<'_, '_, '_>, start_block: Block) {
             }
         };
     }
-
-    fx.bcx.seal_all_blocks();
-    fx.bcx.finalize();
 }
 
 fn codegen_stmt<'tcx>(
@@ -932,7 +937,28 @@ pub(crate) fn codegen_panic<'tcx>(
     codegen_panic_inner(fx, rustc_hir::LangItem::Panic, &args, source_info.span);
 }
 
-pub(crate) fn codegen_panic_inner<'tcx>(
+pub(crate) fn codegen_panic_nounwind<'tcx>(
+    fx: &mut FunctionCx<'_, '_, 'tcx>,
+    msg_str: &str,
+    source_info: mir::SourceInfo,
+) {
+    let msg_ptr = fx.anonymous_str(msg_str);
+    let msg_len = fx.bcx.ins().iconst(fx.pointer_type, i64::try_from(msg_str.len()).unwrap());
+    let args = [msg_ptr, msg_len];
+
+    codegen_panic_inner(fx, rustc_hir::LangItem::PanicNounwind, &args, source_info.span);
+}
+
+pub(crate) fn codegen_panic_cannot_unwind<'tcx>(
+    fx: &mut FunctionCx<'_, '_, 'tcx>,
+    source_info: mir::SourceInfo,
+) {
+    let args = [];
+
+    codegen_panic_inner(fx, rustc_hir::LangItem::PanicCannotUnwind, &args, source_info.span);
+}
+
+fn codegen_panic_inner<'tcx>(
     fx: &mut FunctionCx<'_, '_, 'tcx>,
     lang_item: rustc_hir::LangItem,
     args: &[Value],
@@ -949,11 +975,7 @@ pub(crate) fn codegen_panic_inner<'tcx>(
 
     fx.lib_call(
         &*symbol_name,
-        vec![
-            AbiParam::new(fx.pointer_type),
-            AbiParam::new(fx.pointer_type),
-            AbiParam::new(fx.pointer_type),
-        ],
+        args.iter().map(|&arg| AbiParam::new(fx.bcx.func.dfg.value_type(arg))).collect(),
         vec![],
         args,
     );
diff --git a/compiler/rustc_codegen_cranelift/src/common.rs b/compiler/rustc_codegen_cranelift/src/common.rs
index 63ed10cdfcc..f41af3a9e63 100644
--- a/compiler/rustc_codegen_cranelift/src/common.rs
+++ b/compiler/rustc_codegen_cranelift/src/common.rs
@@ -168,6 +168,15 @@ pub(crate) fn codegen_icmp_imm(
     }
 }
 
+pub(crate) fn codegen_bitcast(fx: &mut FunctionCx<'_, '_, '_>, dst_ty: Type, val: Value) -> Value {
+    let mut flags = MemFlags::new();
+    flags.set_endianness(match fx.tcx.data_layout.endian {
+        rustc_target::abi::Endian::Big => cranelift_codegen::ir::Endianness::Big,
+        rustc_target::abi::Endian::Little => cranelift_codegen::ir::Endianness::Little,
+    });
+    fx.bcx.ins().bitcast(dst_ty, flags, val)
+}
+
 pub(crate) fn type_zero_value(bcx: &mut FunctionBuilder<'_>, ty: Type) -> Value {
     if ty == types::I128 {
         let zero = bcx.ins().iconst(types::I64, 0);
diff --git a/compiler/rustc_codegen_cranelift/src/cranelift_native.rs b/compiler/rustc_codegen_cranelift/src/cranelift_native.rs
new file mode 100644
index 00000000000..6c4efca4424
--- /dev/null
+++ b/compiler/rustc_codegen_cranelift/src/cranelift_native.rs
@@ -0,0 +1,248 @@
+// Vendored from https://github.com/bytecodealliance/wasmtime/blob/b58a197d33f044193c3d608010f5e6ec394ac07e/cranelift/native/src/lib.rs
+// which is licensed as
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+// unlike rustc_codegen_cranelift itself. Also applies a small change to remove #![cfg_attr] that
+// rust's CI complains about and to fix formatting to match rustc.
+// FIXME revert back to the external crate with Cranelift 0.93
+#![allow(warnings)]
+
+//! Performs autodetection of the host for the purposes of running
+//! Cranelift to generate code to run on the same machine.
+
+#![deny(missing_docs, trivial_numeric_casts, unused_extern_crates, unstable_features)]
+#![warn(unused_import_braces)]
+
+use cranelift_codegen::isa;
+use target_lexicon::Triple;
+
+/// Return an `isa` builder configured for the current host
+/// machine, or `Err(())` if the host machine is not supported
+/// in the current configuration.
+pub fn builder() -> Result<isa::Builder, &'static str> {
+    builder_with_options(true)
+}
+
+/// Return an `isa` builder configured for the current host
+/// machine, or `Err(())` if the host machine is not supported
+/// in the current configuration.
+///
+/// Selects the given backend variant specifically; this is
+/// useful when more than oen backend exists for a given target
+/// (e.g., on x86-64).
+pub fn builder_with_options(infer_native_flags: bool) -> Result<isa::Builder, &'static str> {
+    let mut isa_builder = isa::lookup(Triple::host()).map_err(|err| match err {
+        isa::LookupError::SupportDisabled => "support for architecture disabled at compile time",
+        isa::LookupError::Unsupported => "unsupported architecture",
+    })?;
+
+    #[cfg(target_arch = "x86_64")]
+    {
+        use cranelift_codegen::settings::Configurable;
+
+        if !std::is_x86_feature_detected!("sse2") {
+            return Err("x86 support requires SSE2");
+        }
+
+        if !infer_native_flags {
+            return Ok(isa_builder);
+        }
+
+        // These are temporarily enabled by default (see #3810 for
+        // more) so that a default-constructed `Flags` can work with
+        // default Wasmtime features. Otherwise, the user must
+        // explicitly use native flags or turn these on when on x86-64
+        // platforms to avoid a configuration panic. In order for the
+        // "enable if detected" logic below to work, we must turn them
+        // *off* (differing from the default) and then re-enable below
+        // if present.
+        isa_builder.set("has_sse3", "false").unwrap();
+        isa_builder.set("has_ssse3", "false").unwrap();
+        isa_builder.set("has_sse41", "false").unwrap();
+        isa_builder.set("has_sse42", "false").unwrap();
+
+        if std::is_x86_feature_detected!("sse3") {
+            isa_builder.enable("has_sse3").unwrap();
+        }
+        if std::is_x86_feature_detected!("ssse3") {
+            isa_builder.enable("has_ssse3").unwrap();
+        }
+        if std::is_x86_feature_detected!("sse4.1") {
+            isa_builder.enable("has_sse41").unwrap();
+        }
+        if std::is_x86_feature_detected!("sse4.2") {
+            isa_builder.enable("has_sse42").unwrap();
+        }
+        if std::is_x86_feature_detected!("popcnt") {
+            isa_builder.enable("has_popcnt").unwrap();
+        }
+        if std::is_x86_feature_detected!("avx") {
+            isa_builder.enable("has_avx").unwrap();
+        }
+        if std::is_x86_feature_detected!("avx2") {
+            isa_builder.enable("has_avx2").unwrap();
+        }
+        if std::is_x86_feature_detected!("fma") {
+            isa_builder.enable("has_fma").unwrap();
+        }
+        if std::is_x86_feature_detected!("bmi1") {
+            isa_builder.enable("has_bmi1").unwrap();
+        }
+        if std::is_x86_feature_detected!("bmi2") {
+            isa_builder.enable("has_bmi2").unwrap();
+        }
+        if std::is_x86_feature_detected!("avx512bitalg") {
+            isa_builder.enable("has_avx512bitalg").unwrap();
+        }
+        if std::is_x86_feature_detected!("avx512dq") {
+            isa_builder.enable("has_avx512dq").unwrap();
+        }
+        if std::is_x86_feature_detected!("avx512f") {
+            isa_builder.enable("has_avx512f").unwrap();
+        }
+        if std::is_x86_feature_detected!("avx512vl") {
+            isa_builder.enable("has_avx512vl").unwrap();
+        }
+        if std::is_x86_feature_detected!("avx512vbmi") {
+            isa_builder.enable("has_avx512vbmi").unwrap();
+        }
+        if std::is_x86_feature_detected!("lzcnt") {
+            isa_builder.enable("has_lzcnt").unwrap();
+        }
+    }
+
+    #[cfg(target_arch = "aarch64")]
+    {
+        use cranelift_codegen::settings::Configurable;
+
+        if !infer_native_flags {
+            return Ok(isa_builder);
+        }
+
+        if std::arch::is_aarch64_feature_detected!("lse") {
+            isa_builder.enable("has_lse").unwrap();
+        }
+
+        if std::arch::is_aarch64_feature_detected!("paca") {
+            isa_builder.enable("has_pauth").unwrap();
+        }
+
+        if cfg!(target_os = "macos") {
+            // Pointer authentication is always available on Apple Silicon.
+            isa_builder.enable("sign_return_address").unwrap();
+            // macOS enforces the use of the B key for return addresses.
+            isa_builder.enable("sign_return_address_with_bkey").unwrap();
+        }
+    }
+
+    // There is no is_s390x_feature_detected macro yet, so for now
+    // we use getauxval from the libc crate directly.
+    #[cfg(all(target_arch = "s390x", target_os = "linux"))]
+    {
+        use cranelift_codegen::settings::Configurable;
+
+        if !infer_native_flags {
+            return Ok(isa_builder);
+        }
+
+        let v = unsafe { libc::getauxval(libc::AT_HWCAP) };
+        const HWCAP_S390X_VXRS_EXT2: libc::c_ulong = 32768;
+        if (v & HWCAP_S390X_VXRS_EXT2) != 0 {
+            isa_builder.enable("has_vxrs_ext2").unwrap();
+            // There is no separate HWCAP bit for mie2, so assume
+            // that any machine with vxrs_ext2 also has mie2.
+            isa_builder.enable("has_mie2").unwrap();
+        }
+    }
+
+    // `is_riscv_feature_detected` is nightly only for now, use
+    // getauxval from the libc crate directly as a temporary measure.
+    #[cfg(all(target_arch = "riscv64", target_os = "linux"))]
+    {
+        use cranelift_codegen::settings::Configurable;
+
+        if !infer_native_flags {
+            return Ok(isa_builder);
+        }
+
+        let v = unsafe { libc::getauxval(libc::AT_HWCAP) };
+
+        const HWCAP_RISCV_EXT_A: libc::c_ulong = 1 << (b'a' - b'a');
+        const HWCAP_RISCV_EXT_C: libc::c_ulong = 1 << (b'c' - b'a');
+        const HWCAP_RISCV_EXT_D: libc::c_ulong = 1 << (b'd' - b'a');
+        const HWCAP_RISCV_EXT_F: libc::c_ulong = 1 << (b'f' - b'a');
+        const HWCAP_RISCV_EXT_M: libc::c_ulong = 1 << (b'm' - b'a');
+        const HWCAP_RISCV_EXT_V: libc::c_ulong = 1 << (b'v' - b'a');
+
+        if (v & HWCAP_RISCV_EXT_A) != 0 {
+            isa_builder.enable("has_a").unwrap();
+        }
+
+        if (v & HWCAP_RISCV_EXT_C) != 0 {
+            isa_builder.enable("has_c").unwrap();
+        }
+
+        if (v & HWCAP_RISCV_EXT_D) != 0 {
+            isa_builder.enable("has_d").unwrap();
+        }
+
+        if (v & HWCAP_RISCV_EXT_F) != 0 {
+            isa_builder.enable("has_f").unwrap();
+
+            // TODO: There doesn't seem to be a bit associated with this extension
+            // rust enables it with the `f` extension:
+            // https://github.com/rust-lang/stdarch/blob/790411f93c4b5eada3c23abb4c9a063fb0b24d99/crates/std_detect/src/detect/os/linux/riscv.rs#L43
+            isa_builder.enable("has_zicsr").unwrap();
+        }
+
+        if (v & HWCAP_RISCV_EXT_M) != 0 {
+            isa_builder.enable("has_m").unwrap();
+        }
+
+        if (v & HWCAP_RISCV_EXT_V) != 0 {
+            isa_builder.enable("has_v").unwrap();
+        }
+
+        // TODO: ZiFencei does not have a bit associated with it
+        // TODO: Zbkb does not have a bit associated with it
+    }
+
+    // squelch warnings about unused mut/variables on some platforms.
+    drop(&mut isa_builder);
+    drop(infer_native_flags);
+
+    Ok(isa_builder)
+}
+
+#[cfg(test)]
+mod tests {
+    use super::builder;
+    use cranelift_codegen::isa::CallConv;
+    use cranelift_codegen::settings;
+
+    #[test]
+    fn test() {
+        if let Ok(isa_builder) = builder() {
+            let flag_builder = settings::builder();
+            let isa = isa_builder.finish(settings::Flags::new(flag_builder)).unwrap();
+
+            if cfg!(all(target_os = "macos", target_arch = "aarch64")) {
+                assert_eq!(isa.default_call_conv(), CallConv::AppleAarch64);
+            } else if cfg!(any(unix, target_os = "nebulet")) {
+                assert_eq!(isa.default_call_conv(), CallConv::SystemV);
+            } else if cfg!(windows) {
+                assert_eq!(isa.default_call_conv(), CallConv::WindowsFastcall);
+            }
+
+            if cfg!(target_pointer_width = "64") {
+                assert_eq!(isa.pointer_bits(), 64);
+            } else if cfg!(target_pointer_width = "32") {
+                assert_eq!(isa.pointer_bits(), 32);
+            } else if cfg!(target_pointer_width = "16") {
+                assert_eq!(isa.pointer_bits(), 16);
+            }
+        }
+    }
+}
+
+/// Version number of this crate.
+pub const VERSION: &str = env!("CARGO_PKG_VERSION");
diff --git a/compiler/rustc_codegen_cranelift/src/debuginfo/mod.rs b/compiler/rustc_codegen_cranelift/src/debuginfo/mod.rs
index 28fbcb15b2b..3a7421d8b30 100644
--- a/compiler/rustc_codegen_cranelift/src/debuginfo/mod.rs
+++ b/compiler/rustc_codegen_cranelift/src/debuginfo/mod.rs
@@ -20,6 +20,14 @@ use indexmap::IndexSet;
 pub(crate) use emit::{DebugReloc, DebugRelocName};
 pub(crate) use unwind::UnwindContext;
 
+pub(crate) fn producer() -> String {
+    format!(
+        "cg_clif (rustc {}, cranelift {})",
+        rustc_interface::util::rustc_version_str().unwrap_or("unknown version"),
+        cranelift_codegen::VERSION,
+    )
+}
+
 pub(crate) struct DebugContext {
     endian: RunTimeEndian,
 
@@ -57,11 +65,7 @@ impl DebugContext {
 
         let mut dwarf = DwarfUnit::new(encoding);
 
-        let producer = format!(
-            "cg_clif (rustc {}, cranelift {})",
-            rustc_interface::util::rustc_version_str().unwrap_or("unknown version"),
-            cranelift_codegen::VERSION,
-        );
+        let producer = producer();
         let comp_dir = tcx
             .sess
             .opts
diff --git a/compiler/rustc_codegen_cranelift/src/driver/aot.rs b/compiler/rustc_codegen_cranelift/src/driver/aot.rs
index f873561c171..d4494a9e45d 100644
--- a/compiler/rustc_codegen_cranelift/src/driver/aot.rs
+++ b/compiler/rustc_codegen_cranelift/src/driver/aot.rs
@@ -108,6 +108,8 @@ impl OngoingCodegen {
 
         self.concurrency_limiter.finished();
 
+        sess.abort_if_errors();
+
         (
             CodegenResults {
                 modules,
@@ -169,10 +171,22 @@ fn emit_cgu(
 fn emit_module(
     output_filenames: &OutputFilenames,
     prof: &SelfProfilerRef,
-    object: cranelift_object::object::write::Object<'_>,
+    mut object: cranelift_object::object::write::Object<'_>,
     kind: ModuleKind,
     name: String,
 ) -> Result<CompiledModule, String> {
+    if object.format() == cranelift_object::object::BinaryFormat::Elf {
+        let comment_section = object.add_section(
+            Vec::new(),
+            b".comment".to_vec(),
+            cranelift_object::object::SectionKind::OtherString,
+        );
+        let mut producer = vec![0];
+        producer.extend(crate::debuginfo::producer().as_bytes());
+        producer.push(0);
+        object.set_section_data(comment_section, producer, 1);
+    }
+
     let tmp_file = output_filenames.temp_path(OutputType::Object, Some(&name));
     let mut file = match File::create(&tmp_file) {
         Ok(file) => file,
@@ -399,8 +413,6 @@ pub(crate) fn run_aot(
             .collect::<Vec<_>>()
     });
 
-    tcx.sess.abort_if_errors();
-
     let mut allocator_module = make_module(tcx.sess, &backend_config, "allocator_shim".to_string());
     let mut allocator_unwind_context = UnwindContext::new(allocator_module.isa(), true);
     let created_alloc_shim =
diff --git a/compiler/rustc_codegen_cranelift/src/intrinsics/llvm_x86.rs b/compiler/rustc_codegen_cranelift/src/intrinsics/llvm_x86.rs
index 7bc161fbe55..d2ae6978ca2 100644
--- a/compiler/rustc_codegen_cranelift/src/intrinsics/llvm_x86.rs
+++ b/compiler/rustc_codegen_cranelift/src/intrinsics/llvm_x86.rs
@@ -33,8 +33,8 @@ pub(crate) fn codegen_x86_llvm_intrinsic_call<'tcx>(
 
                 // cast float to int
                 let a_lane = match lane_ty {
-                    types::F32 => fx.bcx.ins().bitcast(types::I32, a_lane),
-                    types::F64 => fx.bcx.ins().bitcast(types::I64, a_lane),
+                    types::F32 => codegen_bitcast(fx, types::I32, a_lane),
+                    types::F64 => codegen_bitcast(fx, types::I64, a_lane),
                     _ => a_lane,
                 };
 
diff --git a/compiler/rustc_codegen_cranelift/src/intrinsics/mod.rs b/compiler/rustc_codegen_cranelift/src/intrinsics/mod.rs
index b1adaa193b3..d561cf139b6 100644
--- a/compiler/rustc_codegen_cranelift/src/intrinsics/mod.rs
+++ b/compiler/rustc_codegen_cranelift/src/intrinsics/mod.rs
@@ -201,7 +201,7 @@ fn bool_to_zero_or_max_uint<'tcx>(
     let mut res = fx.bcx.ins().bmask(int_ty, val);
 
     if ty.is_float() {
-        res = fx.bcx.ins().bitcast(ty, res);
+        res = codegen_bitcast(fx, ty, res);
     }
 
     res
@@ -241,10 +241,9 @@ pub(crate) fn codegen_intrinsic_call<'tcx>(
             substs,
             args,
             destination,
+            target,
             source_info.span,
         );
-        let ret_block = fx.get_block(target);
-        fx.bcx.ins().jump(ret_block, &[]);
     } else if codegen_float_intrinsic_call(fx, intrinsic, args, destination) {
         let ret_block = fx.get_block(target);
         fx.bcx.ins().jump(ret_block, &[]);
@@ -651,7 +650,7 @@ fn codegen_regular_intrinsic_call<'tcx>(
             let layout = fx.layout_of(substs.type_at(0));
             if layout.abi.is_uninhabited() {
                 with_no_trimmed_paths!({
-                    crate::base::codegen_panic(
+                    crate::base::codegen_panic_nounwind(
                         fx,
                         &format!("attempted to instantiate uninhabited type `{}`", layout.ty),
                         source_info,
@@ -664,7 +663,7 @@ fn codegen_regular_intrinsic_call<'tcx>(
                 && !fx.tcx.permits_zero_init(fx.param_env().and(layout))
             {
                 with_no_trimmed_paths!({
-                    crate::base::codegen_panic(
+                    crate::base::codegen_panic_nounwind(
                         fx,
                         &format!(
                             "attempted to zero-initialize type `{}`, which is invalid",
@@ -680,7 +679,7 @@ fn codegen_regular_intrinsic_call<'tcx>(
                 && !fx.tcx.permits_uninit_init(fx.param_env().and(layout))
             {
                 with_no_trimmed_paths!({
-                    crate::base::codegen_panic(
+                    crate::base::codegen_panic_nounwind(
                         fx,
                         &format!(
                             "attempted to leave type `{}` uninitialized, which is invalid",
diff --git a/compiler/rustc_codegen_cranelift/src/intrinsics/simd.rs b/compiler/rustc_codegen_cranelift/src/intrinsics/simd.rs
index 14f5e918739..b33eb29754a 100644
--- a/compiler/rustc_codegen_cranelift/src/intrinsics/simd.rs
+++ b/compiler/rustc_codegen_cranelift/src/intrinsics/simd.rs
@@ -24,6 +24,7 @@ pub(super) fn codegen_simd_intrinsic_call<'tcx>(
     _substs: SubstsRef<'tcx>,
     args: &[mir::Operand<'tcx>],
     ret: CPlace<'tcx>,
+    target: BasicBlock,
     span: Span,
 ) {
     match intrinsic {
@@ -277,16 +278,15 @@ pub(super) fn codegen_simd_intrinsic_call<'tcx>(
             } else {
                 fx.tcx.sess.span_warn(span, "Index argument for `simd_extract` is not a constant");
                 let trap_block = fx.bcx.create_block();
-                let dummy_block = fx.bcx.create_block();
                 let true_ = fx.bcx.ins().iconst(types::I8, 1);
                 fx.bcx.ins().brnz(true_, trap_block, &[]);
-                fx.bcx.ins().jump(dummy_block, &[]);
+                let ret_block = fx.get_block(target);
+                fx.bcx.ins().jump(ret_block, &[]);
                 fx.bcx.switch_to_block(trap_block);
                 crate::trap::trap_unimplemented(
                     fx,
                     "Index argument for `simd_extract` is not a constant",
                 );
-                fx.bcx.switch_to_block(dummy_block);
                 return;
             };
 
@@ -770,11 +770,119 @@ pub(super) fn codegen_simd_intrinsic_call<'tcx>(
             });
         }
 
-        // simd_arith_offset
-        // simd_scatter
-        // simd_gather
+        sym::simd_expose_addr | sym::simd_from_exposed_addr | sym::simd_cast_ptr => {
+            intrinsic_args!(fx, args => (arg); intrinsic);
+            ret.write_cvalue_transmute(fx, arg);
+        }
+
+        sym::simd_arith_offset => {
+            intrinsic_args!(fx, args => (ptr, offset); intrinsic);
+
+            let (lane_count, ptr_lane_ty) = ptr.layout().ty.simd_size_and_type(fx.tcx);
+            let pointee_ty = ptr_lane_ty.builtin_deref(true).unwrap().ty;
+            let pointee_size = fx.layout_of(pointee_ty).size.bytes();
+            let (ret_lane_count, ret_lane_ty) = ret.layout().ty.simd_size_and_type(fx.tcx);
+            let ret_lane_layout = fx.layout_of(ret_lane_ty);
+            assert_eq!(lane_count, ret_lane_count);
+
+            for lane_idx in 0..lane_count {
+                let ptr_lane = ptr.value_lane(fx, lane_idx).load_scalar(fx);
+                let offset_lane = offset.value_lane(fx, lane_idx).load_scalar(fx);
+
+                let ptr_diff = if pointee_size != 1 {
+                    fx.bcx.ins().imul_imm(offset_lane, pointee_size as i64)
+                } else {
+                    offset_lane
+                };
+                let res_lane = fx.bcx.ins().iadd(ptr_lane, ptr_diff);
+                let res_lane = CValue::by_val(res_lane, ret_lane_layout);
+
+                ret.place_lane(fx, lane_idx).write_cvalue(fx, res_lane);
+            }
+        }
+
+        sym::simd_gather => {
+            intrinsic_args!(fx, args => (val, ptr, mask); intrinsic);
+
+            let (val_lane_count, val_lane_ty) = val.layout().ty.simd_size_and_type(fx.tcx);
+            let (ptr_lane_count, _ptr_lane_ty) = ptr.layout().ty.simd_size_and_type(fx.tcx);
+            let (mask_lane_count, _mask_lane_ty) = mask.layout().ty.simd_size_and_type(fx.tcx);
+            let (ret_lane_count, ret_lane_ty) = ret.layout().ty.simd_size_and_type(fx.tcx);
+            assert_eq!(val_lane_count, ptr_lane_count);
+            assert_eq!(val_lane_count, mask_lane_count);
+            assert_eq!(val_lane_count, ret_lane_count);
+
+            let lane_clif_ty = fx.clif_type(val_lane_ty).unwrap();
+            let ret_lane_layout = fx.layout_of(ret_lane_ty);
+
+            for lane_idx in 0..ptr_lane_count {
+                let val_lane = val.value_lane(fx, lane_idx).load_scalar(fx);
+                let ptr_lane = ptr.value_lane(fx, lane_idx).load_scalar(fx);
+                let mask_lane = mask.value_lane(fx, lane_idx).load_scalar(fx);
+
+                let if_enabled = fx.bcx.create_block();
+                let if_disabled = fx.bcx.create_block();
+                let next = fx.bcx.create_block();
+                let res_lane = fx.bcx.append_block_param(next, lane_clif_ty);
+
+                fx.bcx.ins().brnz(mask_lane, if_enabled, &[]);
+                fx.bcx.ins().jump(if_disabled, &[]);
+                fx.bcx.seal_block(if_enabled);
+                fx.bcx.seal_block(if_disabled);
+
+                fx.bcx.switch_to_block(if_enabled);
+                let res = fx.bcx.ins().load(lane_clif_ty, MemFlags::trusted(), ptr_lane, 0);
+                fx.bcx.ins().jump(next, &[res]);
+
+                fx.bcx.switch_to_block(if_disabled);
+                fx.bcx.ins().jump(next, &[val_lane]);
+
+                fx.bcx.seal_block(next);
+                fx.bcx.switch_to_block(next);
+
+                fx.bcx.ins().nop();
+
+                ret.place_lane(fx, lane_idx)
+                    .write_cvalue(fx, CValue::by_val(res_lane, ret_lane_layout));
+            }
+        }
+
+        sym::simd_scatter => {
+            intrinsic_args!(fx, args => (val, ptr, mask); intrinsic);
+
+            let (val_lane_count, _val_lane_ty) = val.layout().ty.simd_size_and_type(fx.tcx);
+            let (ptr_lane_count, _ptr_lane_ty) = ptr.layout().ty.simd_size_and_type(fx.tcx);
+            let (mask_lane_count, _mask_lane_ty) = mask.layout().ty.simd_size_and_type(fx.tcx);
+            assert_eq!(val_lane_count, ptr_lane_count);
+            assert_eq!(val_lane_count, mask_lane_count);
+
+            for lane_idx in 0..ptr_lane_count {
+                let val_lane = val.value_lane(fx, lane_idx).load_scalar(fx);
+                let ptr_lane = ptr.value_lane(fx, lane_idx).load_scalar(fx);
+                let mask_lane = mask.value_lane(fx, lane_idx).load_scalar(fx);
+
+                let if_enabled = fx.bcx.create_block();
+                let next = fx.bcx.create_block();
+
+                fx.bcx.ins().brnz(mask_lane, if_enabled, &[]);
+                fx.bcx.ins().jump(next, &[]);
+                fx.bcx.seal_block(if_enabled);
+
+                fx.bcx.switch_to_block(if_enabled);
+                fx.bcx.ins().store(MemFlags::trusted(), val_lane, ptr_lane, 0);
+                fx.bcx.ins().jump(next, &[]);
+
+                fx.bcx.seal_block(next);
+                fx.bcx.switch_to_block(next);
+            }
+        }
+
         _ => {
-            fx.tcx.sess.span_fatal(span, &format!("Unknown SIMD intrinsic {}", intrinsic));
+            fx.tcx.sess.span_err(span, &format!("Unknown SIMD intrinsic {}", intrinsic));
+            // Prevent verifier error
+            fx.bcx.ins().trap(TrapCode::UnreachableCodeReached);
         }
     }
+    let ret_block = fx.get_block(target);
+    fx.bcx.ins().jump(ret_block, &[]);
 }
diff --git a/compiler/rustc_codegen_cranelift/src/lib.rs b/compiler/rustc_codegen_cranelift/src/lib.rs
index 629d79d5012..d3868730557 100644
--- a/compiler/rustc_codegen_cranelift/src/lib.rs
+++ b/compiler/rustc_codegen_cranelift/src/lib.rs
@@ -57,6 +57,8 @@ mod compiler_builtins;
 mod concurrency_limiter;
 mod config;
 mod constant;
+// FIXME revert back to the external crate with Cranelift 0.93
+mod cranelift_native;
 mod debuginfo;
 mod discriminant;
 mod driver;
@@ -278,12 +280,14 @@ fn build_isa(sess: &Session, backend_config: &BackendConfig) -> Box<dyn isa::Tar
         }
     }
 
-    if target_triple.architecture == target_lexicon::Architecture::X86_64 {
+    if let target_lexicon::Architecture::Aarch64(_) | target_lexicon::Architecture::X86_64 =
+        target_triple.architecture
+    {
         // Windows depends on stack probes to grow the committed part of the stack
         flags_builder.enable("enable_probestack").unwrap();
         flags_builder.set("probestack_strategy", "inline").unwrap();
     } else {
-        // __cranelift_probestack is not provided and inline stack probes are only supported on x86_64
+        // __cranelift_probestack is not provided and inline stack probes are only supported on AArch64 and x86_64
         flags_builder.set("enable_probestack", "false").unwrap();
     }
 
diff --git a/compiler/rustc_codegen_cranelift/src/main_shim.rs b/compiler/rustc_codegen_cranelift/src/main_shim.rs
index c10054e7f0d..fd45362548c 100644
--- a/compiler/rustc_codegen_cranelift/src/main_shim.rs
+++ b/compiler/rustc_codegen_cranelift/src/main_shim.rs
@@ -64,13 +64,20 @@ pub(crate) fn maybe_create_entry_wrapper(
             ],
             returns: vec![AbiParam::new(m.target_config().pointer_type() /*isize*/)],
             call_conv: crate::conv_to_call_conv(
+                tcx.sess,
                 tcx.sess.target.options.entry_abi,
                 m.target_config().default_call_conv,
             ),
         };
 
         let entry_name = tcx.sess.target.options.entry_name.as_ref();
-        let cmain_func_id = m.declare_function(entry_name, Linkage::Export, &cmain_sig).unwrap();
+        let cmain_func_id = match m.declare_function(entry_name, Linkage::Export, &cmain_sig) {
+            Ok(func_id) => func_id,
+            Err(err) => {
+                tcx.sess
+                    .fatal(&format!("entry symbol `{entry_name}` declared multiple times: {err}"));
+            }
+        };
 
         let instance = Instance::mono(tcx, rust_main_def_id).polymorphize(tcx);
 
@@ -162,7 +169,11 @@ pub(crate) fn maybe_create_entry_wrapper(
             bcx.seal_all_blocks();
             bcx.finalize();
         }
-        m.define_function(cmain_func_id, &mut ctx).unwrap();
+
+        if let Err(err) = m.define_function(cmain_func_id, &mut ctx) {
+            tcx.sess.fatal(&format!("entry symbol `{entry_name}` defined multiple times: {err}"));
+        }
+
         unwind_context.add_function(cmain_func_id, &ctx, m.isa());
     }
 }
diff --git a/compiler/rustc_codegen_cranelift/src/optimize/peephole.rs b/compiler/rustc_codegen_cranelift/src/optimize/peephole.rs
index 7f45bbd8f28..26327dca299 100644
--- a/compiler/rustc_codegen_cranelift/src/optimize/peephole.rs
+++ b/compiler/rustc_codegen_cranelift/src/optimize/peephole.rs
@@ -7,7 +7,7 @@ use cranelift_frontend::FunctionBuilder;
 /// otherwise return the given value and false.
 pub(crate) fn maybe_unwrap_bool_not(bcx: &mut FunctionBuilder<'_>, arg: Value) -> (Value, bool) {
     if let ValueDef::Result(arg_inst, 0) = bcx.func.dfg.value_def(arg) {
-        match bcx.func.dfg[arg_inst] {
+        match bcx.func.dfg.insts[arg_inst] {
             // This is the lowering of `Rvalue::Not`
             InstructionData::IntCompareImm {
                 opcode: Opcode::IcmpImm,
@@ -34,7 +34,7 @@ pub(crate) fn maybe_known_branch_taken(
         return None;
     };
 
-    match bcx.func.dfg[arg_inst] {
+    match bcx.func.dfg.insts[arg_inst] {
         InstructionData::UnaryImm { opcode: Opcode::Iconst, imm } => {
             if test_zero {
                 Some(imm.bits() == 0)
diff --git a/compiler/rustc_codegen_cranelift/src/value_and_place.rs b/compiler/rustc_codegen_cranelift/src/value_and_place.rs
index fe8af21ac6d..fa06d6c3ba7 100644
--- a/compiler/rustc_codegen_cranelift/src/value_and_place.rs
+++ b/compiler/rustc_codegen_cranelift/src/value_and_place.rs
@@ -514,8 +514,8 @@ impl<'tcx> CPlace<'tcx> {
                 (types::I32, types::F32)
                 | (types::F32, types::I32)
                 | (types::I64, types::F64)
-                | (types::F64, types::I64) => fx.bcx.ins().bitcast(dst_ty, data),
-                _ if src_ty.is_vector() && dst_ty.is_vector() => fx.bcx.ins().bitcast(dst_ty, data),
+                | (types::F64, types::I64) => codegen_bitcast(fx, dst_ty, data),
+                _ if src_ty.is_vector() && dst_ty.is_vector() => codegen_bitcast(fx, dst_ty, data),
                 _ if src_ty.is_vector() || dst_ty.is_vector() => {
                     // FIXME do something more efficient for transmutes between vectors and integers.
                     let stack_slot = fx.bcx.create_sized_stack_slot(StackSlotData {