about summary refs log tree commit diff
path: root/compiler/rustc_codegen_cranelift/src
diff options
context:
space:
mode:
authorbjorn3 <17426603+bjorn3@users.noreply.github.com>2025-03-30 15:43:48 +0000
committerbjorn3 <17426603+bjorn3@users.noreply.github.com>2025-03-30 15:43:48 +0000
commit1111a9788650b6cde6f78250a81328f6ab211b51 (patch)
tree8da5f804e24c7309bca11e50855c5a7d1c9c3926 /compiler/rustc_codegen_cranelift/src
parent45b40a75966b36d3588f173441896fddad01cd80 (diff)
parentba315abda789c9f59f2100102232bddb30b0d3d3 (diff)
downloadrust-1111a9788650b6cde6f78250a81328f6ab211b51.tar.gz
rust-1111a9788650b6cde6f78250a81328f6ab211b51.zip
Merge commit 'ba315abda789c9f59f2100102232bddb30b0d3d3' into sync_cg_clif-2025-03-30
Diffstat (limited to 'compiler/rustc_codegen_cranelift/src')
-rw-r--r--compiler/rustc_codegen_cranelift/src/base.rs6
-rw-r--r--compiler/rustc_codegen_cranelift/src/common.rs18
-rw-r--r--compiler/rustc_codegen_cranelift/src/config.rs34
-rw-r--r--compiler/rustc_codegen_cranelift/src/debuginfo/types.rs4
-rw-r--r--compiler/rustc_codegen_cranelift/src/driver/aot.rs2
-rw-r--r--compiler/rustc_codegen_cranelift/src/driver/jit.rs225
-rw-r--r--compiler/rustc_codegen_cranelift/src/inline_asm.rs48
-rw-r--r--compiler/rustc_codegen_cranelift/src/intrinsics/llvm.rs8
-rw-r--r--compiler/rustc_codegen_cranelift/src/intrinsics/llvm_aarch64.rs123
-rw-r--r--compiler/rustc_codegen_cranelift/src/lib.rs26
-rw-r--r--compiler/rustc_codegen_cranelift/src/num.rs8
-rw-r--r--compiler/rustc_codegen_cranelift/src/value_and_place.rs6
12 files changed, 230 insertions, 278 deletions
diff --git a/compiler/rustc_codegen_cranelift/src/base.rs b/compiler/rustc_codegen_cranelift/src/base.rs
index 125a9201831..adaa754491e 100644
--- a/compiler/rustc_codegen_cranelift/src/base.rs
+++ b/compiler/rustc_codegen_cranelift/src/base.rs
@@ -729,8 +729,10 @@ fn codegen_stmt<'tcx>(
                     let to_ty = fx.monomorphize(to_ty);
 
                     fn is_wide_ptr<'tcx>(fx: &FunctionCx<'_, '_, 'tcx>, ty: Ty<'tcx>) -> bool {
-                        ty.builtin_deref(true)
-                            .is_some_and(|pointee_ty| has_ptr_meta(fx.tcx, pointee_ty))
+                        ty.builtin_deref(true).is_some_and(|pointee_ty| {
+                            fx.tcx
+                                .type_has_metadata(pointee_ty, ty::TypingEnv::fully_monomorphized())
+                        })
                     }
 
                     if is_wide_ptr(fx, from_ty) {
diff --git a/compiler/rustc_codegen_cranelift/src/common.rs b/compiler/rustc_codegen_cranelift/src/common.rs
index 766278d8718..abe2972ba0c 100644
--- a/compiler/rustc_codegen_cranelift/src/common.rs
+++ b/compiler/rustc_codegen_cranelift/src/common.rs
@@ -71,7 +71,7 @@ fn clif_type_from_ty<'tcx>(tcx: TyCtxt<'tcx>, ty: Ty<'tcx>) -> Option<types::Typ
         },
         ty::FnPtr(..) => pointer_ty(tcx),
         ty::RawPtr(pointee_ty, _) | ty::Ref(_, pointee_ty, _) => {
-            if has_ptr_meta(tcx, *pointee_ty) {
+            if tcx.type_has_metadata(*pointee_ty, ty::TypingEnv::fully_monomorphized()) {
                 return None;
             } else {
                 pointer_ty(tcx)
@@ -91,7 +91,7 @@ fn clif_pair_type_from_ty<'tcx>(
             (clif_type_from_ty(tcx, types[0])?, clif_type_from_ty(tcx, types[1])?)
         }
         ty::RawPtr(pointee_ty, _) | ty::Ref(_, pointee_ty, _) => {
-            if has_ptr_meta(tcx, *pointee_ty) {
+            if tcx.type_has_metadata(*pointee_ty, ty::TypingEnv::fully_monomorphized()) {
                 (pointer_ty(tcx), pointer_ty(tcx))
             } else {
                 return None;
@@ -101,20 +101,6 @@ fn clif_pair_type_from_ty<'tcx>(
     })
 }
 
-/// Is a pointer to this type a wide ptr?
-pub(crate) fn has_ptr_meta<'tcx>(tcx: TyCtxt<'tcx>, ty: Ty<'tcx>) -> bool {
-    if ty.is_sized(tcx, ty::TypingEnv::fully_monomorphized()) {
-        return false;
-    }
-
-    let tail = tcx.struct_tail_for_codegen(ty, ty::TypingEnv::fully_monomorphized());
-    match tail.kind() {
-        ty::Foreign(..) => false,
-        ty::Str | ty::Slice(..) | ty::Dynamic(..) => true,
-        _ => bug!("unexpected unsized tail: {:?}", tail),
-    }
-}
-
 pub(crate) fn codegen_icmp_imm(
     fx: &mut FunctionCx<'_, '_, '_>,
     intcc: IntCC,
diff --git a/compiler/rustc_codegen_cranelift/src/config.rs b/compiler/rustc_codegen_cranelift/src/config.rs
index d784f6e9d9e..d328b33a704 100644
--- a/compiler/rustc_codegen_cranelift/src/config.rs
+++ b/compiler/rustc_codegen_cranelift/src/config.rs
@@ -1,21 +1,10 @@
-/// The mode to use for compilation.
-#[derive(Copy, Clone, Debug)]
-pub enum CodegenMode {
-    /// AOT compile the crate. This is the default.
-    Aot,
-    /// JIT compile and execute the crate.
-    Jit,
-    /// JIT compile and execute the crate, but only compile functions the first time they are used.
-    JitLazy,
-}
-
 /// Configuration of cg_clif as passed in through `-Cllvm-args` and various env vars.
 #[derive(Clone, Debug)]
 pub struct BackendConfig {
     /// Should the crate be AOT compiled or JIT executed.
     ///
-    /// Defaults to AOT compilation. Can be set using `-Cllvm-args=mode=...`.
-    pub codegen_mode: CodegenMode,
+    /// Defaults to AOT compilation. Can be set using `-Cllvm-args=jit-mode`.
+    pub jit_mode: bool,
 
     /// When JIT mode is enable pass these arguments to the program.
     ///
@@ -27,7 +16,7 @@ impl BackendConfig {
     /// Parse the configuration passed in using `-Cllvm-args`.
     pub fn from_opts(opts: &[String]) -> Result<Self, String> {
         let mut config = BackendConfig {
-            codegen_mode: CodegenMode::Aot,
+            jit_mode: false,
             jit_args: match std::env::var("CG_CLIF_JIT_ARGS") {
                 Ok(args) => args.split(' ').map(|arg| arg.to_string()).collect(),
                 Err(std::env::VarError::NotPresent) => vec![],
@@ -43,20 +32,9 @@ impl BackendConfig {
                 // testing cg_clif.
                 continue;
             }
-            if let Some((name, value)) = opt.split_once('=') {
-                match name {
-                    "mode" => {
-                        config.codegen_mode = match value {
-                            "aot" => CodegenMode::Aot,
-                            "jit" => CodegenMode::Jit,
-                            "jit-lazy" => CodegenMode::JitLazy,
-                            _ => return Err(format!("Unknown codegen mode `{}`", value)),
-                        };
-                    }
-                    _ => return Err(format!("Unknown option `{}`", name)),
-                }
-            } else {
-                return Err(format!("Invalid option `{}`", opt));
+            match &**opt {
+                "jit-mode" => config.jit_mode = true,
+                _ => return Err(format!("Unknown option `{}`", opt)),
             }
         }
 
diff --git a/compiler/rustc_codegen_cranelift/src/debuginfo/types.rs b/compiler/rustc_codegen_cranelift/src/debuginfo/types.rs
index 017d7784dc0..25b922c8be4 100644
--- a/compiler/rustc_codegen_cranelift/src/debuginfo/types.rs
+++ b/compiler/rustc_codegen_cranelift/src/debuginfo/types.rs
@@ -6,7 +6,7 @@ use rustc_data_structures::fx::FxHashMap;
 use rustc_middle::ty::layout::LayoutOf;
 use rustc_middle::ty::{self, Ty, TyCtxt};
 
-use crate::{DebugContext, FullyMonomorphizedLayoutCx, has_ptr_meta};
+use crate::{DebugContext, FullyMonomorphizedLayoutCx};
 
 #[derive(Default)]
 pub(crate) struct TypeDebugContext<'tcx> {
@@ -129,7 +129,7 @@ impl DebugContext {
 
         let name = type_names::compute_debuginfo_type_name(tcx, ptr_type, true);
 
-        if !has_ptr_meta(tcx, ptr_type) {
+        if !tcx.type_has_metadata(ptr_type, ty::TypingEnv::fully_monomorphized()) {
             let pointer_type_id =
                 self.dwarf.unit.add(self.dwarf.unit.root(), gimli::DW_TAG_pointer_type);
             let pointer_entry = self.dwarf.unit.get_mut(pointer_type_id);
diff --git a/compiler/rustc_codegen_cranelift/src/driver/aot.rs b/compiler/rustc_codegen_cranelift/src/driver/aot.rs
index fb7864ae612..444dc441286 100644
--- a/compiler/rustc_codegen_cranelift/src/driver/aot.rs
+++ b/compiler/rustc_codegen_cranelift/src/driver/aot.rs
@@ -331,7 +331,7 @@ fn produce_final_output_artifacts(
 }
 
 fn make_module(sess: &Session, name: String) -> UnwindModule<ObjectModule> {
-    let isa = crate::build_isa(sess);
+    let isa = crate::build_isa(sess, false);
 
     let mut builder =
         ObjectBuilder::new(isa, name + ".o", cranelift_module::default_libcall_names()).unwrap();
diff --git a/compiler/rustc_codegen_cranelift/src/driver/jit.rs b/compiler/rustc_codegen_cranelift/src/driver/jit.rs
index 57c88f4b0f9..41f8bb9161c 100644
--- a/compiler/rustc_codegen_cranelift/src/driver/jit.rs
+++ b/compiler/rustc_codegen_cranelift/src/driver/jit.rs
@@ -1,75 +1,27 @@
 //! The JIT driver uses [`cranelift_jit`] to JIT execute programs without writing any object
 //! files.
 
-use std::cell::RefCell;
 use std::ffi::CString;
 use std::os::raw::{c_char, c_int};
-use std::sync::{Mutex, OnceLock, mpsc};
 
-use cranelift_frontend::{FunctionBuilder, FunctionBuilderContext};
 use cranelift_jit::{JITBuilder, JITModule};
 use rustc_codegen_ssa::CrateInfo;
 use rustc_middle::mir::mono::MonoItem;
 use rustc_session::Session;
 use rustc_span::sym;
 
+use crate::CodegenCx;
 use crate::debuginfo::TypeDebugContext;
 use crate::prelude::*;
 use crate::unwind_module::UnwindModule;
-use crate::{CodegenCx, CodegenMode};
 
-struct JitState {
-    jit_module: UnwindModule<JITModule>,
-}
-
-thread_local! {
-    static LAZY_JIT_STATE: RefCell<Option<JitState>> = const { RefCell::new(None) };
-}
-
-/// The Sender owned by the rustc thread
-static GLOBAL_MESSAGE_SENDER: OnceLock<Mutex<mpsc::Sender<UnsafeMessage>>> = OnceLock::new();
-
-/// A message that is sent from the jitted runtime to the rustc thread.
-/// Senders are responsible for upholding `Send` semantics.
-enum UnsafeMessage {
-    /// Request that the specified `Instance` be lazily jitted.
-    ///
-    /// Nothing accessible through `instance_ptr` may be moved or mutated by the sender after
-    /// this message is sent.
-    JitFn {
-        instance_ptr: *const Instance<'static>,
-        trampoline_ptr: *const u8,
-        tx: mpsc::Sender<*const u8>,
-    },
-}
-unsafe impl Send for UnsafeMessage {}
-
-impl UnsafeMessage {
-    /// Send the message.
-    fn send(self) {
-        thread_local! {
-            /// The Sender owned by the local thread
-            static LOCAL_MESSAGE_SENDER: mpsc::Sender<UnsafeMessage> =
-                GLOBAL_MESSAGE_SENDER
-                    .get().unwrap()
-                    .lock().unwrap()
-                    .clone();
-        }
-        LOCAL_MESSAGE_SENDER.with(|sender| {
-            sender.send(self).expect("rustc thread hung up before lazy JIT request was sent")
-        })
-    }
-}
-
-fn create_jit_module(tcx: TyCtxt<'_>, hotswap: bool) -> (UnwindModule<JITModule>, CodegenCx) {
+fn create_jit_module(tcx: TyCtxt<'_>) -> (UnwindModule<JITModule>, CodegenCx) {
     let crate_info = CrateInfo::new(tcx, "dummy_target_cpu".to_string());
 
-    let isa = crate::build_isa(tcx.sess);
+    let isa = crate::build_isa(tcx.sess, true);
     let mut jit_builder = JITBuilder::with_isa(isa, cranelift_module::default_libcall_names());
-    jit_builder.hotswap(hotswap);
     crate::compiler_builtins::register_functions_for_jit(&mut jit_builder);
     jit_builder.symbol_lookup_fn(dep_symbol_lookup_fn(tcx.sess, crate_info));
-    jit_builder.symbol("__clif_jit_fn", clif_jit_fn as *const u8);
     let mut jit_module = UnwindModule::new(JITModule::new(jit_builder), false);
 
     let cx = crate::CodegenCx::new(tcx, jit_module.isa(), false, sym::dummy_cgu_name);
@@ -79,7 +31,7 @@ fn create_jit_module(tcx: TyCtxt<'_>, hotswap: bool) -> (UnwindModule<JITModule>
     (jit_module, cx)
 }
 
-pub(crate) fn run_jit(tcx: TyCtxt<'_>, codegen_mode: CodegenMode, jit_args: Vec<String>) -> ! {
+pub(crate) fn run_jit(tcx: TyCtxt<'_>, jit_args: Vec<String>) -> ! {
     if !tcx.sess.opts.output_types.should_codegen() {
         tcx.dcx().fatal("JIT mode doesn't work with `cargo check`");
     }
@@ -88,8 +40,7 @@ pub(crate) fn run_jit(tcx: TyCtxt<'_>, codegen_mode: CodegenMode, jit_args: Vec<
         tcx.dcx().fatal("can't jit non-executable crate");
     }
 
-    let (mut jit_module, mut cx) =
-        create_jit_module(tcx, matches!(codegen_mode, CodegenMode::JitLazy));
+    let (mut jit_module, mut cx) = create_jit_module(tcx);
     let mut cached_context = Context::new();
 
     let cgus = tcx.collect_and_partition_mono_items(()).codegen_units;
@@ -105,21 +56,15 @@ pub(crate) fn run_jit(tcx: TyCtxt<'_>, codegen_mode: CodegenMode, jit_args: Vec<
         super::predefine_mono_items(tcx, &mut jit_module, &mono_items);
         for (mono_item, _) in mono_items {
             match mono_item {
-                MonoItem::Fn(inst) => match codegen_mode {
-                    CodegenMode::Aot => unreachable!(),
-                    CodegenMode::Jit => {
-                        codegen_and_compile_fn(
-                            tcx,
-                            &mut cx,
-                            &mut cached_context,
-                            &mut jit_module,
-                            inst,
-                        );
-                    }
-                    CodegenMode::JitLazy => {
-                        codegen_shim(tcx, &mut cached_context, &mut jit_module, inst)
-                    }
-                },
+                MonoItem::Fn(inst) => {
+                    codegen_and_compile_fn(
+                        tcx,
+                        &mut cx,
+                        &mut cached_context,
+                        &mut jit_module,
+                        inst,
+                    );
+                }
                 MonoItem::Static(def_id) => {
                     crate::constant::codegen_static(tcx, &mut jit_module, def_id);
                 }
@@ -161,41 +106,17 @@ pub(crate) fn run_jit(tcx: TyCtxt<'_>, codegen_mode: CodegenMode, jit_args: Vec<
     let start_func_id = jit_module.declare_function("main", Linkage::Import, &start_sig).unwrap();
     let finalized_start: *const u8 = jit_module.module.get_finalized_function(start_func_id);
 
-    LAZY_JIT_STATE.with(|lazy_jit_state| {
-        let mut lazy_jit_state = lazy_jit_state.borrow_mut();
-        assert!(lazy_jit_state.is_none());
-        *lazy_jit_state = Some(JitState { jit_module });
-    });
-
     let f: extern "C" fn(c_int, *const *const c_char) -> c_int =
         unsafe { ::std::mem::transmute(finalized_start) };
 
-    let (tx, rx) = mpsc::channel();
-    GLOBAL_MESSAGE_SENDER.set(Mutex::new(tx)).unwrap();
-
-    // Spawn the jitted runtime in a new thread so that this rustc thread can handle messages
-    // (eg to lazily JIT further functions as required)
-    std::thread::spawn(move || {
-        let mut argv = args.iter().map(|arg| arg.as_ptr()).collect::<Vec<_>>();
+    let mut argv = args.iter().map(|arg| arg.as_ptr()).collect::<Vec<_>>();
 
-        // Push a null pointer as a terminating argument. This is required by POSIX and
-        // useful as some dynamic linkers use it as a marker to jump over.
-        argv.push(std::ptr::null());
+    // Push a null pointer as a terminating argument. This is required by POSIX and
+    // useful as some dynamic linkers use it as a marker to jump over.
+    argv.push(std::ptr::null());
 
-        let ret = f(args.len() as c_int, argv.as_ptr());
-        std::process::exit(ret);
-    });
-
-    // Handle messages
-    loop {
-        match rx.recv().unwrap() {
-            // lazy JIT compilation request - compile requested instance and return pointer to result
-            UnsafeMessage::JitFn { instance_ptr, trampoline_ptr, tx } => {
-                tx.send(jit_fn(instance_ptr, trampoline_ptr))
-                    .expect("jitted runtime hung up before response to lazy JIT request was sent");
-            }
-        }
-    }
+    let ret = f(args.len() as c_int, argv.as_ptr());
+    std::process::exit(ret);
 }
 
 pub(crate) fn codegen_and_compile_fn<'tcx>(
@@ -227,58 +148,6 @@ pub(crate) fn codegen_and_compile_fn<'tcx>(
     });
 }
 
-extern "C" fn clif_jit_fn(
-    instance_ptr: *const Instance<'static>,
-    trampoline_ptr: *const u8,
-) -> *const u8 {
-    // send the JIT request to the rustc thread, with a channel for the response
-    let (tx, rx) = mpsc::channel();
-    UnsafeMessage::JitFn { instance_ptr, trampoline_ptr, tx }.send();
-
-    // block on JIT compilation result
-    rx.recv().expect("rustc thread hung up before responding to sent lazy JIT request")
-}
-
-fn jit_fn(instance_ptr: *const Instance<'static>, trampoline_ptr: *const u8) -> *const u8 {
-    rustc_middle::ty::tls::with(|tcx| {
-        // lift is used to ensure the correct lifetime for instance.
-        let instance = tcx.lift(unsafe { *instance_ptr }).unwrap();
-
-        LAZY_JIT_STATE.with(|lazy_jit_state| {
-            let mut lazy_jit_state = lazy_jit_state.borrow_mut();
-            let lazy_jit_state = lazy_jit_state.as_mut().unwrap();
-            let jit_module = &mut lazy_jit_state.jit_module;
-
-            let name = tcx.symbol_name(instance).name;
-            let sig = crate::abi::get_function_sig(
-                tcx,
-                jit_module.target_config().default_call_conv,
-                instance,
-            );
-            let func_id = jit_module.declare_function(name, Linkage::Export, &sig).unwrap();
-
-            let current_ptr = jit_module.module.read_got_entry(func_id);
-
-            // If the function's GOT entry has already been updated to point at something other
-            // than the shim trampoline, don't re-jit but just return the new pointer instead.
-            // This does not need synchronization as this code is executed only by a sole rustc
-            // thread.
-            if current_ptr != trampoline_ptr {
-                return current_ptr;
-            }
-
-            jit_module.module.prepare_for_function_redefine(func_id).unwrap();
-
-            let mut cx = crate::CodegenCx::new(tcx, jit_module.isa(), false, sym::dummy_cgu_name);
-            codegen_and_compile_fn(tcx, &mut cx, &mut Context::new(), jit_module, instance);
-
-            assert!(cx.global_asm.is_empty());
-            jit_module.finalize_definitions();
-            jit_module.module.get_finalized_function(func_id)
-        })
-    })
-}
-
 fn dep_symbol_lookup_fn(
     sess: &Session,
     crate_info: CrateInfo,
@@ -326,57 +195,3 @@ fn dep_symbol_lookup_fn(
         None
     })
 }
-
-fn codegen_shim<'tcx>(
-    tcx: TyCtxt<'tcx>,
-    cached_context: &mut Context,
-    module: &mut UnwindModule<JITModule>,
-    inst: Instance<'tcx>,
-) {
-    let pointer_type = module.target_config().pointer_type();
-
-    let name = tcx.symbol_name(inst).name;
-    let sig = crate::abi::get_function_sig(tcx, module.target_config().default_call_conv, inst);
-    let func_id = module.declare_function(name, Linkage::Export, &sig).unwrap();
-
-    let instance_ptr = Box::into_raw(Box::new(inst));
-
-    let jit_fn = module
-        .declare_function(
-            "__clif_jit_fn",
-            Linkage::Import,
-            &Signature {
-                call_conv: module.target_config().default_call_conv,
-                params: vec![AbiParam::new(pointer_type), AbiParam::new(pointer_type)],
-                returns: vec![AbiParam::new(pointer_type)],
-            },
-        )
-        .unwrap();
-
-    let context = cached_context;
-    context.clear();
-    let trampoline = &mut context.func;
-    trampoline.signature = sig.clone();
-
-    let mut builder_ctx = FunctionBuilderContext::new();
-    let mut trampoline_builder = FunctionBuilder::new(trampoline, &mut builder_ctx);
-
-    let trampoline_fn = module.declare_func_in_func(func_id, trampoline_builder.func);
-    let jit_fn = module.declare_func_in_func(jit_fn, trampoline_builder.func);
-    let sig_ref = trampoline_builder.func.import_signature(sig);
-
-    let entry_block = trampoline_builder.create_block();
-    trampoline_builder.append_block_params_for_function_params(entry_block);
-    let fn_args = trampoline_builder.func.dfg.block_params(entry_block).to_vec();
-
-    trampoline_builder.switch_to_block(entry_block);
-    let instance_ptr = trampoline_builder.ins().iconst(pointer_type, instance_ptr as u64 as i64);
-    let trampoline_ptr = trampoline_builder.ins().func_addr(pointer_type, trampoline_fn);
-    let jitted_fn = trampoline_builder.ins().call(jit_fn, &[instance_ptr, trampoline_ptr]);
-    let jitted_fn = trampoline_builder.func.dfg.inst_results(jitted_fn)[0];
-    let call_inst = trampoline_builder.ins().call_indirect(sig_ref, jitted_fn, &fn_args);
-    let ret_vals = trampoline_builder.func.dfg.inst_results(call_inst).to_vec();
-    trampoline_builder.ins().return_(&ret_vals);
-
-    module.define_function(func_id, context).unwrap();
-}
diff --git a/compiler/rustc_codegen_cranelift/src/inline_asm.rs b/compiler/rustc_codegen_cranelift/src/inline_asm.rs
index 310b226814d..fbc33a64285 100644
--- a/compiler/rustc_codegen_cranelift/src/inline_asm.rs
+++ b/compiler/rustc_codegen_cranelift/src/inline_asm.rs
@@ -612,6 +612,15 @@ impl<'tcx> InlineAssemblyGenerator<'_, 'tcx> {
             generated_asm.push_str(".att_syntax\n");
         }
 
+        if self.arch == InlineAsmArch::AArch64 {
+            for feature in &self.tcx.codegen_fn_attrs(self.enclosing_def_id).target_features {
+                if feature.name == sym::neon {
+                    continue;
+                }
+                writeln!(generated_asm, ".arch_extension {}", feature.name).unwrap();
+            }
+        }
+
         // The actual inline asm
         for piece in self.template {
             match piece {
@@ -652,6 +661,20 @@ impl<'tcx> InlineAssemblyGenerator<'_, 'tcx> {
                                         .emit(&mut generated_asm, InlineAsmArch::X86_64, *modifier)
                                         .unwrap(),
                                 },
+                                InlineAsmArch::AArch64 => match reg {
+                                    InlineAsmReg::AArch64(reg) if reg.vreg_index().is_some() => {
+                                        // rustc emits v0 rather than q0
+                                        reg.emit(
+                                            &mut generated_asm,
+                                            InlineAsmArch::AArch64,
+                                            Some(modifier.unwrap_or('q')),
+                                        )
+                                        .unwrap()
+                                    }
+                                    _ => reg
+                                        .emit(&mut generated_asm, InlineAsmArch::AArch64, *modifier)
+                                        .unwrap(),
+                                },
                                 _ => reg.emit(&mut generated_asm, self.arch, *modifier).unwrap(),
                             }
                         }
@@ -665,6 +688,15 @@ impl<'tcx> InlineAssemblyGenerator<'_, 'tcx> {
         }
         generated_asm.push('\n');
 
+        if self.arch == InlineAsmArch::AArch64 {
+            for feature in &self.tcx.codegen_fn_attrs(self.enclosing_def_id).target_features {
+                if feature.name == sym::neon {
+                    continue;
+                }
+                writeln!(generated_asm, ".arch_extension no{}", feature.name).unwrap();
+            }
+        }
+
         if is_x86 && self.options.contains(InlineAsmOptions::ATT_SYNTAX) {
             generated_asm.push_str(".intel_syntax noprefix\n");
         }
@@ -809,7 +841,13 @@ impl<'tcx> InlineAssemblyGenerator<'_, 'tcx> {
             }
             InlineAsmArch::AArch64 => {
                 generated_asm.push_str("    str ");
-                reg.emit(generated_asm, InlineAsmArch::AArch64, None).unwrap();
+                match reg {
+                    InlineAsmReg::AArch64(reg) if reg.vreg_index().is_some() => {
+                        // rustc emits v0 rather than q0
+                        reg.emit(generated_asm, InlineAsmArch::AArch64, Some('q')).unwrap()
+                    }
+                    _ => reg.emit(generated_asm, InlineAsmArch::AArch64, None).unwrap(),
+                }
                 writeln!(generated_asm, ", [x19, 0x{:x}]", offset.bytes()).unwrap();
             }
             InlineAsmArch::RiscV64 => {
@@ -851,7 +889,13 @@ impl<'tcx> InlineAssemblyGenerator<'_, 'tcx> {
             }
             InlineAsmArch::AArch64 => {
                 generated_asm.push_str("    ldr ");
-                reg.emit(generated_asm, InlineAsmArch::AArch64, None).unwrap();
+                match reg {
+                    InlineAsmReg::AArch64(reg) if reg.vreg_index().is_some() => {
+                        // rustc emits v0 rather than q0
+                        reg.emit(generated_asm, InlineAsmArch::AArch64, Some('q')).unwrap()
+                    }
+                    _ => reg.emit(generated_asm, InlineAsmArch::AArch64, None).unwrap(),
+                }
                 writeln!(generated_asm, ", [x19, 0x{:x}]", offset.bytes()).unwrap();
             }
             InlineAsmArch::RiscV64 => {
diff --git a/compiler/rustc_codegen_cranelift/src/intrinsics/llvm.rs b/compiler/rustc_codegen_cranelift/src/intrinsics/llvm.rs
index 720a0d8fbf5..eb0dfbb69c3 100644
--- a/compiler/rustc_codegen_cranelift/src/intrinsics/llvm.rs
+++ b/compiler/rustc_codegen_cranelift/src/intrinsics/llvm.rs
@@ -54,6 +54,14 @@ pub(crate) fn codegen_llvm_intrinsic_call<'tcx>(
             );
         }
 
+        "llvm.fptosi.sat.v4i32.v4f32" => {
+            intrinsic_args!(fx, args => (a); intrinsic);
+
+            simd_for_each_lane(fx, a, ret, &|fx, _lane_ty, _res_lane_ty, lane| {
+                fx.bcx.ins().fcvt_to_sint_sat(types::I32, lane)
+            });
+        }
+
         _ => {
             fx.tcx
                 .dcx()
diff --git a/compiler/rustc_codegen_cranelift/src/intrinsics/llvm_aarch64.rs b/compiler/rustc_codegen_cranelift/src/intrinsics/llvm_aarch64.rs
index 4c59c81296b..387c87d123a 100644
--- a/compiler/rustc_codegen_cranelift/src/intrinsics/llvm_aarch64.rs
+++ b/compiler/rustc_codegen_cranelift/src/intrinsics/llvm_aarch64.rs
@@ -1,5 +1,9 @@
 //! Emulate AArch64 LLVM intrinsics
 
+use rustc_ast::ast::{InlineAsmOptions, InlineAsmTemplatePiece};
+use rustc_target::asm::*;
+
+use crate::inline_asm::{CInlineAsmOperand, codegen_inline_asm_inner};
 use crate::intrinsics::*;
 use crate::prelude::*;
 
@@ -17,7 +21,7 @@ pub(crate) fn codegen_aarch64_llvm_intrinsic_call<'tcx>(
             fx.bcx.ins().fence();
         }
 
-        "llvm.aarch64.neon.ld1x4.v16i8.p0i8" => {
+        "llvm.aarch64.neon.ld1x4.v16i8.p0" => {
             intrinsic_args!(fx, args => (ptr); intrinsic);
 
             let ptr = ptr.load_scalar(fx);
@@ -49,6 +53,121 @@ pub(crate) fn codegen_aarch64_llvm_intrinsic_call<'tcx>(
             });
         }
 
+        "llvm.aarch64.neon.fcvtns.v4i32.v4f32" => {
+            intrinsic_args!(fx, args => (a); intrinsic);
+
+            // Note: Using inline asm instead of fcvt_to_sint as the latter rounds to zero rather than to nearest
+
+            let a_ptr = a.force_stack(fx).0.get_addr(fx);
+            let res_place = CPlace::new_stack_slot(fx, ret.layout());
+            let res_ptr = res_place.to_ptr().get_addr(fx);
+
+            codegen_inline_asm_inner(
+                fx,
+                &[InlineAsmTemplatePiece::String(
+                    "ldr     q0, [x0]
+                     fcvtns  v0.4s, v0.4s
+                     str     q0, [x1]"
+                        .into(),
+                )],
+                &[
+                    CInlineAsmOperand::In {
+                        reg: InlineAsmRegOrRegClass::Reg(InlineAsmReg::AArch64(
+                            AArch64InlineAsmReg::x0,
+                        )),
+                        value: a_ptr,
+                    },
+                    CInlineAsmOperand::In {
+                        reg: InlineAsmRegOrRegClass::Reg(InlineAsmReg::AArch64(
+                            AArch64InlineAsmReg::x1,
+                        )),
+                        value: res_ptr,
+                    },
+                ],
+                InlineAsmOptions::NOSTACK,
+            );
+            let res = res_place.to_cvalue(fx);
+            ret.write_cvalue_transmute(fx, res);
+        }
+
+        "llvm.aarch64.neon.frecpe.v4f32" => {
+            intrinsic_args!(fx, args => (a); intrinsic);
+
+            let a_ptr = a.force_stack(fx).0.get_addr(fx);
+            let res_place = CPlace::new_stack_slot(fx, ret.layout());
+            let res_ptr = res_place.to_ptr().get_addr(fx);
+
+            codegen_inline_asm_inner(
+                fx,
+                &[InlineAsmTemplatePiece::String(
+                    "ldr     q0, [x0]
+                     frecpe  v0.4s, v0.4s
+                     str     q0, [x1]"
+                        .into(),
+                )],
+                &[
+                    CInlineAsmOperand::In {
+                        reg: InlineAsmRegOrRegClass::Reg(InlineAsmReg::AArch64(
+                            AArch64InlineAsmReg::x0,
+                        )),
+                        value: a_ptr,
+                    },
+                    CInlineAsmOperand::In {
+                        reg: InlineAsmRegOrRegClass::Reg(InlineAsmReg::AArch64(
+                            AArch64InlineAsmReg::x1,
+                        )),
+                        value: res_ptr,
+                    },
+                ],
+                InlineAsmOptions::NOSTACK,
+            );
+            let res = res_place.to_cvalue(fx);
+            ret.write_cvalue_transmute(fx, res);
+        }
+
+        "llvm.aarch64.neon.frecps.v4f32" => {
+            intrinsic_args!(fx, args => (a, b); intrinsic);
+
+            let a_ptr = a.force_stack(fx).0.get_addr(fx);
+            let b_ptr = b.force_stack(fx).0.get_addr(fx);
+            let res_place = CPlace::new_stack_slot(fx, ret.layout());
+            let res_ptr = res_place.to_ptr().get_addr(fx);
+
+            codegen_inline_asm_inner(
+                fx,
+                &[InlineAsmTemplatePiece::String(
+                    "ldr     q0, [x0]
+                     ldr     q1, [x1]
+                     frecps  v0.4s, v0.4s, v1.4s
+                     str     q0, [x2]"
+                        .into(),
+                )],
+                &[
+                    CInlineAsmOperand::In {
+                        reg: InlineAsmRegOrRegClass::Reg(InlineAsmReg::AArch64(
+                            AArch64InlineAsmReg::x0,
+                        )),
+                        value: a_ptr,
+                    },
+                    CInlineAsmOperand::In {
+                        reg: InlineAsmRegOrRegClass::Reg(InlineAsmReg::AArch64(
+                            AArch64InlineAsmReg::x1,
+                        )),
+                        value: b_ptr,
+                    },
+                    CInlineAsmOperand::In {
+                        reg: InlineAsmRegOrRegClass::Reg(InlineAsmReg::AArch64(
+                            AArch64InlineAsmReg::x2,
+                        )),
+                        value: res_ptr,
+                    },
+                ],
+                InlineAsmOptions::NOSTACK,
+            );
+            let res = res_place.to_cvalue(fx);
+            ret.write_cvalue_transmute(fx, res);
+        }
+
         _ if intrinsic.starts_with("llvm.aarch64.neon.sqadd.v")
             || intrinsic.starts_with("llvm.aarch64.neon.uqadd.v") =>
         {
@@ -134,7 +253,7 @@ pub(crate) fn codegen_aarch64_llvm_intrinsic_call<'tcx>(
             }
             let res = CValue::by_val(
                 fx.bcx.ins().uextend(types::I32, res_val),
-                fx.layout_of(fx.tcx.types.u32),
+                fx.layout_of(fx.tcx.types.i32),
             );
             ret.write_cvalue(fx, res);
         }
diff --git a/compiler/rustc_codegen_cranelift/src/lib.rs b/compiler/rustc_codegen_cranelift/src/lib.rs
index ab3386a9b4c..e7afaff3b42 100644
--- a/compiler/rustc_codegen_cranelift/src/lib.rs
+++ b/compiler/rustc_codegen_cranelift/src/lib.rs
@@ -42,7 +42,6 @@ use std::sync::Arc;
 use cranelift_codegen::isa::TargetIsa;
 use cranelift_codegen::settings::{self, Configurable};
 use rustc_codegen_ssa::CodegenResults;
-use rustc_codegen_ssa::back::versioned_llvm_target;
 use rustc_codegen_ssa::traits::CodegenBackend;
 use rustc_metadata::EncodedMetadata;
 use rustc_middle::dep_graph::{WorkProduct, WorkProductId};
@@ -214,15 +213,14 @@ impl CodegenBackend for CraneliftCodegenBackend {
             BackendConfig::from_opts(&tcx.sess.opts.cg.llvm_args)
                 .unwrap_or_else(|err| tcx.sess.dcx().fatal(err))
         });
-        match config.codegen_mode {
-            CodegenMode::Aot => driver::aot::run_aot(tcx, metadata, need_metadata_module),
-            CodegenMode::Jit | CodegenMode::JitLazy => {
-                #[cfg(feature = "jit")]
-                driver::jit::run_jit(tcx, config.codegen_mode, config.jit_args);
-
-                #[cfg(not(feature = "jit"))]
-                tcx.dcx().fatal("jit support was disabled when compiling rustc_codegen_cranelift");
-            }
+        if config.jit_mode {
+            #[cfg(feature = "jit")]
+            driver::jit::run_jit(tcx, config.jit_args);
+
+            #[cfg(not(feature = "jit"))]
+            tcx.dcx().fatal("jit support was disabled when compiling rustc_codegen_cranelift");
+        } else {
+            driver::aot::run_aot(tcx, metadata, need_metadata_module)
         }
     }
 
@@ -247,21 +245,19 @@ fn enable_verifier(sess: &Session) -> bool {
 }
 
 fn target_triple(sess: &Session) -> target_lexicon::Triple {
-    // FIXME(madsmtm): Use `sess.target.llvm_target` once target-lexicon supports unversioned macOS.
-    // See <https://github.com/bytecodealliance/target-lexicon/pull/113>
-    match versioned_llvm_target(sess).parse() {
+    match sess.target.llvm_target.parse() {
         Ok(triple) => triple,
         Err(err) => sess.dcx().fatal(format!("target not recognized: {}", err)),
     }
 }
 
-fn build_isa(sess: &Session) -> Arc<dyn TargetIsa + 'static> {
+fn build_isa(sess: &Session, jit: bool) -> Arc<dyn TargetIsa + 'static> {
     use target_lexicon::BinaryFormat;
 
     let target_triple = crate::target_triple(sess);
 
     let mut flags_builder = settings::builder();
-    flags_builder.enable("is_pic").unwrap();
+    flags_builder.set("is_pic", if jit { "false" } else { "true" }).unwrap();
     let enable_verifier = if enable_verifier(sess) { "true" } else { "false" };
     flags_builder.set("enable_verifier", enable_verifier).unwrap();
     flags_builder.set("regalloc_checker", enable_verifier).unwrap();
diff --git a/compiler/rustc_codegen_cranelift/src/num.rs b/compiler/rustc_codegen_cranelift/src/num.rs
index f44e2459a78..2a4d1e3ae57 100644
--- a/compiler/rustc_codegen_cranelift/src/num.rs
+++ b/compiler/rustc_codegen_cranelift/src/num.rs
@@ -395,8 +395,12 @@ pub(crate) fn codegen_ptr_binop<'tcx>(
     in_lhs: CValue<'tcx>,
     in_rhs: CValue<'tcx>,
 ) -> CValue<'tcx> {
-    let is_thin_ptr =
-        in_lhs.layout().ty.builtin_deref(true).map(|ty| !has_ptr_meta(fx.tcx, ty)).unwrap_or(true);
+    let is_thin_ptr = in_lhs
+        .layout()
+        .ty
+        .builtin_deref(true)
+        .map(|ty| !fx.tcx.type_has_metadata(ty, ty::TypingEnv::fully_monomorphized()))
+        .unwrap_or(true);
 
     if is_thin_ptr {
         match bin_op {
diff --git a/compiler/rustc_codegen_cranelift/src/value_and_place.rs b/compiler/rustc_codegen_cranelift/src/value_and_place.rs
index cc739fefcd0..f8a19589fdd 100644
--- a/compiler/rustc_codegen_cranelift/src/value_and_place.rs
+++ b/compiler/rustc_codegen_cranelift/src/value_and_place.rs
@@ -746,7 +746,7 @@ impl<'tcx> CPlace<'tcx> {
         };
 
         let (field_ptr, field_layout) = codegen_field(fx, base, extra, layout, field);
-        if has_ptr_meta(fx.tcx, field_layout.ty) {
+        if fx.tcx.type_has_metadata(field_layout.ty, ty::TypingEnv::fully_monomorphized()) {
             CPlace::for_ptr_with_extra(field_ptr, extra.unwrap(), field_layout)
         } else {
             CPlace::for_ptr(field_ptr, field_layout)
@@ -832,7 +832,7 @@ impl<'tcx> CPlace<'tcx> {
 
     pub(crate) fn place_deref(self, fx: &mut FunctionCx<'_, '_, 'tcx>) -> CPlace<'tcx> {
         let inner_layout = fx.layout_of(self.layout().ty.builtin_deref(true).unwrap());
-        if has_ptr_meta(fx.tcx, inner_layout.ty) {
+        if fx.tcx.type_has_metadata(inner_layout.ty, ty::TypingEnv::fully_monomorphized()) {
             let (addr, extra) = self.to_cvalue(fx).load_scalar_pair(fx);
             CPlace::for_ptr_with_extra(Pointer::new(addr), extra, inner_layout)
         } else {
@@ -845,7 +845,7 @@ impl<'tcx> CPlace<'tcx> {
         fx: &mut FunctionCx<'_, '_, 'tcx>,
         layout: TyAndLayout<'tcx>,
     ) -> CValue<'tcx> {
-        if has_ptr_meta(fx.tcx, self.layout().ty) {
+        if fx.tcx.type_has_metadata(self.layout().ty, ty::TypingEnv::fully_monomorphized()) {
             let (ptr, extra) = self.to_ptr_unsized();
             CValue::by_val_pair(ptr.get_addr(fx), extra, layout)
         } else {