about summary refs log tree commit diff
path: root/compiler/rustc_codegen_cranelift/src
diff options
context:
space:
mode:
authorbjorn3 <17426603+bjorn3@users.noreply.github.com>2024-05-13 13:26:33 +0000
committerbjorn3 <17426603+bjorn3@users.noreply.github.com>2024-05-13 13:26:33 +0000
commit75f8bdbca4965896c3d3ead656f6a13e8409a78b (patch)
treed8fba5cd3ad9aee184393d91c0c1b01fc5e76faa /compiler/rustc_codegen_cranelift/src
parentabb95639ef2b837dbfe7b5d18f51fadda29711cb (diff)
parent3270432f4b0583104c8b9b6f695bf97d6bbf3ac2 (diff)
downloadrust-75f8bdbca4965896c3d3ead656f6a13e8409a78b.tar.gz
rust-75f8bdbca4965896c3d3ead656f6a13e8409a78b.zip
Merge commit '3270432f4b0583104c8b9b6f695bf97d6bbf3ac2' into sync_cg_clif-2024-05-13
Diffstat (limited to 'compiler/rustc_codegen_cranelift/src')
-rw-r--r--compiler/rustc_codegen_cranelift/src/abi/mod.rs6
-rw-r--r--compiler/rustc_codegen_cranelift/src/base.rs37
-rw-r--r--compiler/rustc_codegen_cranelift/src/concurrency_limiter.rs21
-rw-r--r--compiler/rustc_codegen_cranelift/src/config.rs9
-rw-r--r--compiler/rustc_codegen_cranelift/src/constant.rs2
-rw-r--r--compiler/rustc_codegen_cranelift/src/discriminant.rs45
-rw-r--r--compiler/rustc_codegen_cranelift/src/driver/aot.rs72
-rw-r--r--compiler/rustc_codegen_cranelift/src/driver/jit.rs23
-rw-r--r--compiler/rustc_codegen_cranelift/src/driver/mod.rs16
-rw-r--r--compiler/rustc_codegen_cranelift/src/global_asm.rs2
-rw-r--r--compiler/rustc_codegen_cranelift/src/inline_asm.rs201
-rw-r--r--compiler/rustc_codegen_cranelift/src/intrinsics/llvm_x86.rs52
-rw-r--r--compiler/rustc_codegen_cranelift/src/lib.rs6
-rw-r--r--compiler/rustc_codegen_cranelift/src/main_shim.rs2
-rw-r--r--compiler/rustc_codegen_cranelift/src/value_and_place.rs8
15 files changed, 362 insertions, 140 deletions
diff --git a/compiler/rustc_codegen_cranelift/src/abi/mod.rs b/compiler/rustc_codegen_cranelift/src/abi/mod.rs
index 6f346af25c6..4bcef15ad04 100644
--- a/compiler/rustc_codegen_cranelift/src/abi/mod.rs
+++ b/compiler/rustc_codegen_cranelift/src/abi/mod.rs
@@ -412,7 +412,7 @@ pub(crate) fn codegen_terminator_call<'tcx>(
                     Err(instance) => Some(instance),
                 }
             }
-            InstanceDef::DropGlue(_, None) => {
+            InstanceDef::DropGlue(_, None) | ty::InstanceDef::AsyncDropGlueCtorShim(_, None) => {
                 // empty drop glue - a nop.
                 let dest = target.expect("Non terminating drop_in_place_real???");
                 let ret_block = fx.get_block(dest);
@@ -597,7 +597,9 @@ pub(crate) fn codegen_drop<'tcx>(
     let ty = drop_place.layout().ty;
     let drop_instance = Instance::resolve_drop_in_place(fx.tcx, ty).polymorphize(fx.tcx);
 
-    if let ty::InstanceDef::DropGlue(_, None) = drop_instance.def {
+    if let ty::InstanceDef::DropGlue(_, None) | ty::InstanceDef::AsyncDropGlueCtorShim(_, None) =
+        drop_instance.def
+    {
         // we don't actually need to drop anything
     } else {
         match ty.kind() {
diff --git a/compiler/rustc_codegen_cranelift/src/base.rs b/compiler/rustc_codegen_cranelift/src/base.rs
index 8874efadec9..5846689643f 100644
--- a/compiler/rustc_codegen_cranelift/src/base.rs
+++ b/compiler/rustc_codegen_cranelift/src/base.rs
@@ -6,6 +6,7 @@ use cranelift_frontend::{FunctionBuilder, FunctionBuilderContext};
 use cranelift_module::ModuleError;
 use rustc_ast::InlineAsmOptions;
 use rustc_index::IndexVec;
+use rustc_middle::middle::codegen_fn_attrs::CodegenFnAttrFlags;
 use rustc_middle::ty::adjustment::PointerCoercion;
 use rustc_middle::ty::layout::FnAbiOf;
 use rustc_middle::ty::print::with_no_trimmed_paths;
@@ -14,6 +15,7 @@ use rustc_monomorphize::is_call_from_compiler_builtins_to_upstream_monomorphizat
 
 use crate::constant::ConstantCx;
 use crate::debuginfo::{FunctionDebugContext, TypeDebugContext};
+use crate::inline_asm::codegen_naked_asm;
 use crate::prelude::*;
 use crate::pretty_clif::CommentWriter;
 
@@ -32,7 +34,7 @@ pub(crate) fn codegen_fn<'tcx>(
     cached_func: Function,
     module: &mut dyn Module,
     instance: Instance<'tcx>,
-) -> CodegenedFunction {
+) -> Option<CodegenedFunction> {
     debug_assert!(!instance.args.has_infer());
 
     let symbol_name = tcx.symbol_name(instance).name.to_string();
@@ -48,6 +50,37 @@ pub(crate) fn codegen_fn<'tcx>(
         String::from_utf8_lossy(&buf).into_owned()
     });
 
+    if tcx.codegen_fn_attrs(instance.def_id()).flags.contains(CodegenFnAttrFlags::NAKED) {
+        assert_eq!(mir.basic_blocks.len(), 1);
+        assert!(mir.basic_blocks[START_BLOCK].statements.is_empty());
+
+        match &mir.basic_blocks[START_BLOCK].terminator().kind {
+            TerminatorKind::InlineAsm {
+                template,
+                operands,
+                options,
+                line_spans: _,
+                targets: _,
+                unwind: _,
+            } => {
+                codegen_naked_asm(
+                    tcx,
+                    cx,
+                    module,
+                    instance,
+                    mir.basic_blocks[START_BLOCK].terminator().source_info.span,
+                    &symbol_name,
+                    template,
+                    operands,
+                    *options,
+                );
+            }
+            _ => unreachable!(),
+        }
+
+        return None;
+    }
+
     // Declare function
     let sig = get_function_sig(tcx, module.target_config().default_call_conv, instance);
     let func_id = module.declare_function(&symbol_name, Linkage::Local, &sig).unwrap();
@@ -128,7 +161,7 @@ pub(crate) fn codegen_fn<'tcx>(
     // Verify function
     verify_func(tcx, &clif_comments, &func);
 
-    CodegenedFunction { symbol_name, func_id, func, clif_comments, func_debug_cx }
+    Some(CodegenedFunction { symbol_name, func_id, func, clif_comments, func_debug_cx })
 }
 
 pub(crate) fn compile_fn(
diff --git a/compiler/rustc_codegen_cranelift/src/concurrency_limiter.rs b/compiler/rustc_codegen_cranelift/src/concurrency_limiter.rs
index 9678969134a..a73860cf18b 100644
--- a/compiler/rustc_codegen_cranelift/src/concurrency_limiter.rs
+++ b/compiler/rustc_codegen_cranelift/src/concurrency_limiter.rs
@@ -6,7 +6,7 @@ use rustc_session::Session;
 // FIXME don't panic when a worker thread panics
 
 pub(super) struct ConcurrencyLimiter {
-    helper_thread: Option<HelperThread>,
+    helper_thread: Option<Mutex<HelperThread>>,
     state: Arc<Mutex<state::ConcurrencyLimiterState>>,
     available_token_condvar: Arc<Condvar>,
     finished: bool,
@@ -39,14 +39,14 @@ impl ConcurrencyLimiter {
             })
             .unwrap();
         ConcurrencyLimiter {
-            helper_thread: Some(helper_thread),
+            helper_thread: Some(Mutex::new(helper_thread)),
             state,
             available_token_condvar,
             finished: false,
         }
     }
 
-    pub(super) fn acquire(&mut self, dcx: &rustc_errors::DiagCtxt) -> ConcurrencyLimiterToken {
+    pub(super) fn acquire(&self, dcx: &rustc_errors::DiagCtxt) -> ConcurrencyLimiterToken {
         let mut state = self.state.lock().unwrap();
         loop {
             state.assert_invariants();
@@ -73,16 +73,11 @@ impl ConcurrencyLimiter {
                 }
             }
 
-            self.helper_thread.as_mut().unwrap().request_token();
+            self.helper_thread.as_ref().unwrap().lock().unwrap().request_token();
             state = self.available_token_condvar.wait(state).unwrap();
         }
     }
 
-    pub(super) fn job_already_done(&mut self) {
-        let mut state = self.state.lock().unwrap();
-        state.job_already_done();
-    }
-
     pub(crate) fn finished(mut self) {
         self.helper_thread.take();
 
@@ -190,14 +185,6 @@ mod state {
             self.assert_invariants();
         }
 
-        pub(super) fn job_already_done(&mut self) {
-            self.assert_invariants();
-            self.pending_jobs -= 1;
-            self.assert_invariants();
-            self.drop_excess_capacity();
-            self.assert_invariants();
-        }
-
         pub(super) fn poison(&mut self, error: String) {
             self.poisoned = true;
             self.stored_error = Some(error);
diff --git a/compiler/rustc_codegen_cranelift/src/config.rs b/compiler/rustc_codegen_cranelift/src/config.rs
index 9e92d656c76..12bce680d9e 100644
--- a/compiler/rustc_codegen_cranelift/src/config.rs
+++ b/compiler/rustc_codegen_cranelift/src/config.rs
@@ -64,8 +64,13 @@ impl Default for BackendConfig {
         BackendConfig {
             codegen_mode: CodegenMode::Aot,
             jit_args: {
-                let args = std::env::var("CG_CLIF_JIT_ARGS").unwrap_or_else(|_| String::new());
-                args.split(' ').map(|arg| arg.to_string()).collect()
+                match std::env::var("CG_CLIF_JIT_ARGS") {
+                    Ok(args) => args.split(' ').map(|arg| arg.to_string()).collect(),
+                    Err(std::env::VarError::NotPresent) => vec![],
+                    Err(std::env::VarError::NotUnicode(s)) => {
+                        panic!("CG_CLIF_JIT_ARGS not unicode: {:?}", s);
+                    }
+                }
             },
             enable_verifier: cfg!(debug_assertions) || bool_env_var("CG_CLIF_ENABLE_VERIFIER"),
             disable_incr_cache: bool_env_var("CG_CLIF_DISABLE_INCR_CACHE"),
diff --git a/compiler/rustc_codegen_cranelift/src/constant.rs b/compiler/rustc_codegen_cranelift/src/constant.rs
index cdf499a22f8..64e83e43d32 100644
--- a/compiler/rustc_codegen_cranelift/src/constant.rs
+++ b/compiler/rustc_codegen_cranelift/src/constant.rs
@@ -258,7 +258,7 @@ fn data_id_for_static(
 ) -> DataId {
     let attrs = tcx.codegen_fn_attrs(def_id);
 
-    let instance = Instance::mono(tcx, def_id).polymorphize(tcx);
+    let instance = Instance::mono(tcx, def_id);
     let symbol_name = tcx.symbol_name(instance).name;
 
     if let Some(import_linkage) = attrs.import_linkage {
diff --git a/compiler/rustc_codegen_cranelift/src/discriminant.rs b/compiler/rustc_codegen_cranelift/src/discriminant.rs
index 670384663e8..e7ac084558a 100644
--- a/compiler/rustc_codegen_cranelift/src/discriminant.rs
+++ b/compiler/rustc_codegen_cranelift/src/discriminant.rs
@@ -28,16 +28,20 @@ pub(crate) fn codegen_set_discriminant<'tcx>(
         } => {
             let ptr = place.place_field(fx, FieldIdx::new(tag_field));
             let to = layout.ty.discriminant_for_variant(fx.tcx, variant_index).unwrap().val;
-            let to = if ptr.layout().abi.is_signed() {
-                ty::ScalarInt::try_from_int(
-                    ptr.layout().size.sign_extend(to) as i128,
-                    ptr.layout().size,
-                )
-                .unwrap()
-            } else {
-                ty::ScalarInt::try_from_uint(to, ptr.layout().size).unwrap()
+            let to = match ptr.layout().ty.kind() {
+                ty::Uint(UintTy::U128) | ty::Int(IntTy::I128) => {
+                    let lsb = fx.bcx.ins().iconst(types::I64, to as u64 as i64);
+                    let msb = fx.bcx.ins().iconst(types::I64, (to >> 64) as u64 as i64);
+                    fx.bcx.ins().iconcat(lsb, msb)
+                }
+                ty::Uint(_) | ty::Int(_) => {
+                    let clif_ty = fx.clif_type(ptr.layout().ty).unwrap();
+                    let raw_val = ptr.layout().size.truncate(to);
+                    fx.bcx.ins().iconst(clif_ty, raw_val as i64)
+                }
+                _ => unreachable!(),
             };
-            let discr = CValue::const_val(fx, ptr.layout(), to);
+            let discr = CValue::by_val(to, ptr.layout());
             ptr.write_cvalue(fx, discr);
         }
         Variants::Multiple {
@@ -85,16 +89,21 @@ pub(crate) fn codegen_get_discriminant<'tcx>(
                 .ty
                 .discriminant_for_variant(fx.tcx, *index)
                 .map_or(u128::from(index.as_u32()), |discr| discr.val);
-            let discr_val = if dest_layout.abi.is_signed() {
-                ty::ScalarInt::try_from_int(
-                    dest_layout.size.sign_extend(discr_val) as i128,
-                    dest_layout.size,
-                )
-                .unwrap()
-            } else {
-                ty::ScalarInt::try_from_uint(discr_val, dest_layout.size).unwrap()
+
+            let val = match dest_layout.ty.kind() {
+                ty::Uint(UintTy::U128) | ty::Int(IntTy::I128) => {
+                    let lsb = fx.bcx.ins().iconst(types::I64, discr_val as u64 as i64);
+                    let msb = fx.bcx.ins().iconst(types::I64, (discr_val >> 64) as u64 as i64);
+                    fx.bcx.ins().iconcat(lsb, msb)
+                }
+                ty::Uint(_) | ty::Int(_) => {
+                    let clif_ty = fx.clif_type(dest_layout.ty).unwrap();
+                    let raw_val = dest_layout.size.truncate(discr_val);
+                    fx.bcx.ins().iconst(clif_ty, raw_val as i64)
+                }
+                _ => unreachable!(),
             };
-            let res = CValue::const_val(fx, dest_layout, discr_val);
+            let res = CValue::by_val(val, dest_layout);
             dest.write_cvalue(fx, res);
             return;
         }
diff --git a/compiler/rustc_codegen_cranelift/src/driver/aot.rs b/compiler/rustc_codegen_cranelift/src/driver/aot.rs
index e8c96486041..fce4690f97d 100644
--- a/compiler/rustc_codegen_cranelift/src/driver/aot.rs
+++ b/compiler/rustc_codegen_cranelift/src/driver/aot.rs
@@ -15,6 +15,7 @@ use rustc_codegen_ssa::errors as ssa_errors;
 use rustc_codegen_ssa::{CodegenResults, CompiledModule, CrateInfo, ModuleKind};
 use rustc_data_structures::profiling::SelfProfilerRef;
 use rustc_data_structures::stable_hasher::{HashStable, StableHasher};
+use rustc_data_structures::sync::{par_map, IntoDynSyncSend};
 use rustc_metadata::fs::copy_to_stdout;
 use rustc_metadata::EncodedMetadata;
 use rustc_middle::dep_graph::{WorkProduct, WorkProductId};
@@ -481,15 +482,16 @@ fn module_codegen(
             for (mono_item, _) in mono_items {
                 match mono_item {
                     MonoItem::Fn(inst) => {
-                        let codegened_function = crate::base::codegen_fn(
+                        if let Some(codegened_function) = crate::base::codegen_fn(
                             tcx,
                             &mut cx,
                             &mut type_dbg,
                             Function::new(),
                             &mut module,
                             inst,
-                        );
-                        codegened_functions.push(codegened_function);
+                        ) {
+                            codegened_functions.push(codegened_function);
+                        }
                     }
                     MonoItem::Static(def_id) => {
                         let data_id = crate::constant::codegen_static(tcx, &mut module, def_id);
@@ -604,39 +606,39 @@ pub(crate) fn run_aot(
 
     let global_asm_config = Arc::new(crate::global_asm::GlobalAsmConfig::new(tcx));
 
-    let mut concurrency_limiter = ConcurrencyLimiter::new(tcx.sess, cgus.len());
+    let (todo_cgus, done_cgus) =
+        cgus.into_iter().enumerate().partition::<Vec<_>, _>(|&(i, _)| match cgu_reuse[i] {
+            _ if backend_config.disable_incr_cache => true,
+            CguReuse::No => true,
+            CguReuse::PreLto | CguReuse::PostLto => false,
+        });
+
+    let concurrency_limiter = IntoDynSyncSend(ConcurrencyLimiter::new(tcx.sess, todo_cgus.len()));
 
     let modules = tcx.sess.time("codegen mono items", || {
-        cgus.iter()
-            .enumerate()
-            .map(|(i, cgu)| {
-                let cgu_reuse =
-                    if backend_config.disable_incr_cache { CguReuse::No } else { cgu_reuse[i] };
-                match cgu_reuse {
-                    CguReuse::No => {
-                        let dep_node = cgu.codegen_dep_node(tcx);
-                        tcx.dep_graph
-                            .with_task(
-                                dep_node,
-                                tcx,
-                                (
-                                    backend_config.clone(),
-                                    global_asm_config.clone(),
-                                    cgu.name(),
-                                    concurrency_limiter.acquire(tcx.dcx()),
-                                ),
-                                module_codegen,
-                                Some(rustc_middle::dep_graph::hash_result),
-                            )
-                            .0
-                    }
-                    CguReuse::PreLto | CguReuse::PostLto => {
-                        concurrency_limiter.job_already_done();
-                        OngoingModuleCodegen::Sync(reuse_workproduct_for_cgu(tcx, cgu))
-                    }
-                }
-            })
-            .collect::<Vec<_>>()
+        let mut modules: Vec<_> = par_map(todo_cgus, |(_, cgu)| {
+            let dep_node = cgu.codegen_dep_node(tcx);
+            tcx.dep_graph
+                .with_task(
+                    dep_node,
+                    tcx,
+                    (
+                        backend_config.clone(),
+                        global_asm_config.clone(),
+                        cgu.name(),
+                        concurrency_limiter.acquire(tcx.dcx()),
+                    ),
+                    module_codegen,
+                    Some(rustc_middle::dep_graph::hash_result),
+                )
+                .0
+        });
+        modules.extend(
+            done_cgus
+                .into_iter()
+                .map(|(_, cgu)| OngoingModuleCodegen::Sync(reuse_workproduct_for_cgu(tcx, cgu))),
+        );
+        modules
     });
 
     let mut allocator_module = make_module(tcx.sess, &backend_config, "allocator_shim".to_string());
@@ -705,6 +707,6 @@ pub(crate) fn run_aot(
         metadata_module,
         metadata,
         crate_info: CrateInfo::new(tcx, target_cpu),
-        concurrency_limiter,
+        concurrency_limiter: concurrency_limiter.0,
     })
 }
diff --git a/compiler/rustc_codegen_cranelift/src/driver/jit.rs b/compiler/rustc_codegen_cranelift/src/driver/jit.rs
index 929fa92596d..4b149131b61 100644
--- a/compiler/rustc_codegen_cranelift/src/driver/jit.rs
+++ b/compiler/rustc_codegen_cranelift/src/driver/jit.rs
@@ -83,13 +83,6 @@ fn create_jit_module(
     );
 
     crate::allocator::codegen(tcx, &mut jit_module, &mut cx.unwind_context);
-    crate::main_shim::maybe_create_entry_wrapper(
-        tcx,
-        &mut jit_module,
-        &mut cx.unwind_context,
-        true,
-        true,
-    );
 
     (jit_module, cx)
 }
@@ -153,6 +146,14 @@ pub(crate) fn run_jit(tcx: TyCtxt<'_>, backend_config: BackendConfig) -> ! {
         tcx.dcx().fatal("Inline asm is not supported in JIT mode");
     }
 
+    crate::main_shim::maybe_create_entry_wrapper(
+        tcx,
+        &mut jit_module,
+        &mut cx.unwind_context,
+        true,
+        true,
+    );
+
     tcx.dcx().abort_if_errors();
 
     jit_module.finalize_definitions().unwrap();
@@ -231,16 +232,16 @@ pub(crate) fn codegen_and_compile_fn<'tcx>(
             crate::PrintOnPanic(|| format!("{:?} {}", instance, tcx.symbol_name(instance).name));
 
         let cached_func = std::mem::replace(&mut cached_context.func, Function::new());
-        let codegened_func = crate::base::codegen_fn(
+        if let Some(codegened_func) = crate::base::codegen_fn(
             tcx,
             cx,
             &mut TypeDebugContext::default(),
             cached_func,
             module,
             instance,
-        );
-
-        crate::base::compile_fn(cx, cached_context, module, codegened_func);
+        ) {
+            crate::base::compile_fn(cx, cached_context, module, codegened_func);
+        }
     });
 }
 
diff --git a/compiler/rustc_codegen_cranelift/src/driver/mod.rs b/compiler/rustc_codegen_cranelift/src/driver/mod.rs
index 12e90b58410..fb0eed07c19 100644
--- a/compiler/rustc_codegen_cranelift/src/driver/mod.rs
+++ b/compiler/rustc_codegen_cranelift/src/driver/mod.rs
@@ -5,6 +5,7 @@
 //! [`codegen_static`]: crate::constant::codegen_static
 
 use rustc_data_structures::profiling::SelfProfilerRef;
+use rustc_middle::middle::codegen_fn_attrs::CodegenFnAttrFlags;
 use rustc_middle::mir::mono::{MonoItem, MonoItemData};
 
 use crate::prelude::*;
@@ -33,7 +34,20 @@ fn predefine_mono_items<'tcx>(
                         data.visibility,
                         is_compiler_builtins,
                     );
-                    module.declare_function(name, linkage, &sig).unwrap();
+                    let is_naked = tcx
+                        .codegen_fn_attrs(instance.def_id())
+                        .flags
+                        .contains(CodegenFnAttrFlags::NAKED);
+                    module
+                        .declare_function(
+                            name,
+                            // Naked functions are defined in a separate object
+                            // file from the codegen unit rustc expects them to
+                            // be defined in.
+                            if is_naked { Linkage::Import } else { linkage },
+                            &sig,
+                        )
+                        .unwrap();
                 }
                 MonoItem::Static(_) | MonoItem::GlobalAsm(_) => {}
             }
diff --git a/compiler/rustc_codegen_cranelift/src/global_asm.rs b/compiler/rustc_codegen_cranelift/src/global_asm.rs
index 5a0cd3990f2..0c99a5ce12f 100644
--- a/compiler/rustc_codegen_cranelift/src/global_asm.rs
+++ b/compiler/rustc_codegen_cranelift/src/global_asm.rs
@@ -81,7 +81,7 @@ pub(crate) fn codegen_global_asm_item(tcx: TyCtxt<'_>, global_asm: &mut String,
                                 );
                             }
 
-                            let instance = Instance::mono(tcx, def_id).polymorphize(tcx);
+                            let instance = Instance::mono(tcx, def_id);
                             let symbol = tcx.symbol_name(instance);
                             global_asm.push_str(symbol.name);
                         }
diff --git a/compiler/rustc_codegen_cranelift/src/inline_asm.rs b/compiler/rustc_codegen_cranelift/src/inline_asm.rs
index 28b92f730da..2de804f5e04 100644
--- a/compiler/rustc_codegen_cranelift/src/inline_asm.rs
+++ b/compiler/rustc_codegen_cranelift/src/inline_asm.rs
@@ -127,7 +127,7 @@ pub(crate) fn codegen_inline_asm_terminator<'tcx>(
             }
             InlineAsmOperand::SymStatic { def_id } => {
                 assert!(fx.tcx.is_static(def_id));
-                let instance = Instance::mono(fx.tcx, def_id).polymorphize(fx.tcx);
+                let instance = Instance::mono(fx.tcx, def_id);
                 CInlineAsmOperand::Symbol { symbol: fx.tcx.symbol_name(instance).name.to_owned() }
             }
             InlineAsmOperand::Label { .. } => {
@@ -169,6 +169,7 @@ pub(crate) fn codegen_inline_asm_inner<'tcx>(
         stack_slots_input: Vec::new(),
         stack_slots_output: Vec::new(),
         stack_slot_size: Size::from_bytes(0),
+        is_naked: false,
     };
     asm_gen.allocate_registers();
     asm_gen.allocate_stack_slots();
@@ -209,6 +210,121 @@ pub(crate) fn codegen_inline_asm_inner<'tcx>(
     call_inline_asm(fx, &asm_name, asm_gen.stack_slot_size, inputs, outputs);
 }
 
+pub(crate) fn codegen_naked_asm<'tcx>(
+    tcx: TyCtxt<'tcx>,
+    cx: &mut crate::CodegenCx,
+    module: &mut dyn Module,
+    instance: Instance<'tcx>,
+    span: Span,
+    symbol_name: &str,
+    template: &[InlineAsmTemplatePiece],
+    operands: &[InlineAsmOperand<'tcx>],
+    options: InlineAsmOptions,
+) {
+    // FIXME add .eh_frame unwind info directives
+
+    let operands = operands
+        .iter()
+        .map(|operand| match *operand {
+            InlineAsmOperand::In { .. }
+            | InlineAsmOperand::Out { .. }
+            | InlineAsmOperand::InOut { .. } => {
+                span_bug!(span, "invalid operand type for naked asm")
+            }
+            InlineAsmOperand::Const { ref value } => {
+                let cv = instance.instantiate_mir_and_normalize_erasing_regions(
+                    tcx,
+                    ty::ParamEnv::reveal_all(),
+                    ty::EarlyBinder::bind(value.const_),
+                );
+                let const_value = cv
+                    .eval(tcx, ty::ParamEnv::reveal_all(), value.span)
+                    .expect("erroneous constant missed by mono item collection");
+
+                let value = rustc_codegen_ssa::common::asm_const_to_str(
+                    tcx,
+                    span,
+                    const_value,
+                    RevealAllLayoutCx(tcx).layout_of(cv.ty()),
+                );
+                CInlineAsmOperand::Const { value }
+            }
+            InlineAsmOperand::SymFn { ref value } => {
+                if cfg!(not(feature = "inline_asm_sym")) {
+                    tcx.dcx()
+                        .span_err(span, "asm! and global_asm! sym operands are not yet supported");
+                }
+
+                let const_ = instance.instantiate_mir_and_normalize_erasing_regions(
+                    tcx,
+                    ty::ParamEnv::reveal_all(),
+                    ty::EarlyBinder::bind(value.const_),
+                );
+                if let ty::FnDef(def_id, args) = *const_.ty().kind() {
+                    let instance = ty::Instance::resolve_for_fn_ptr(
+                        tcx,
+                        ty::ParamEnv::reveal_all(),
+                        def_id,
+                        args,
+                    )
+                    .unwrap();
+                    let symbol = tcx.symbol_name(instance);
+
+                    // Pass a wrapper rather than the function itself as the function itself may not
+                    // be exported from the main codegen unit and may thus be unreachable from the
+                    // object file created by an external assembler.
+                    let inline_asm_index = cx.inline_asm_index.get();
+                    cx.inline_asm_index.set(inline_asm_index + 1);
+                    let wrapper_name = format!(
+                        "__inline_asm_{}_wrapper_n{}",
+                        cx.cgu_name.as_str().replace('.', "__").replace('-', "_"),
+                        inline_asm_index
+                    );
+                    let sig =
+                        get_function_sig(tcx, module.target_config().default_call_conv, instance);
+                    create_wrapper_function(
+                        module,
+                        &mut cx.unwind_context,
+                        sig,
+                        &wrapper_name,
+                        symbol.name,
+                    );
+
+                    CInlineAsmOperand::Symbol { symbol: wrapper_name }
+                } else {
+                    span_bug!(span, "invalid type for asm sym (fn)");
+                }
+            }
+            InlineAsmOperand::SymStatic { def_id } => {
+                assert!(tcx.is_static(def_id));
+                let instance = Instance::mono(tcx, def_id);
+                CInlineAsmOperand::Symbol { symbol: tcx.symbol_name(instance).name.to_owned() }
+            }
+            InlineAsmOperand::Label { .. } => {
+                span_bug!(span, "asm! label operands are not yet supported");
+            }
+        })
+        .collect::<Vec<_>>();
+
+    let asm_gen = InlineAssemblyGenerator {
+        tcx,
+        arch: tcx.sess.asm_arch.unwrap(),
+        enclosing_def_id: instance.def_id(),
+        template,
+        operands: &operands,
+        options,
+        registers: Vec::new(),
+        stack_slots_clobber: Vec::new(),
+        stack_slots_input: Vec::new(),
+        stack_slots_output: Vec::new(),
+        stack_slot_size: Size::from_bytes(0),
+        is_naked: true,
+    };
+
+    let generated_asm = asm_gen.generate_asm_wrapper(symbol_name);
+    cx.global_asm.push_str(&generated_asm);
+}
+
 struct InlineAssemblyGenerator<'a, 'tcx> {
     tcx: TyCtxt<'tcx>,
     arch: InlineAsmArch,
@@ -221,10 +337,13 @@ struct InlineAssemblyGenerator<'a, 'tcx> {
     stack_slots_input: Vec<Option<Size>>,
     stack_slots_output: Vec<Option<Size>>,
     stack_slot_size: Size,
+    is_naked: bool,
 }
 
 impl<'tcx> InlineAssemblyGenerator<'_, 'tcx> {
     fn allocate_registers(&mut self) {
+        assert!(!self.is_naked);
+
         let sess = self.tcx.sess;
         let map = allocatable_registers(
             self.arch,
@@ -348,6 +467,8 @@ impl<'tcx> InlineAssemblyGenerator<'_, 'tcx> {
     }
 
     fn allocate_stack_slots(&mut self) {
+        assert!(!self.is_naked);
+
         let mut slot_size = Size::from_bytes(0);
         let mut slots_clobber = vec![None; self.operands.len()];
         let mut slots_input = vec![None; self.operands.len()];
@@ -468,30 +589,32 @@ impl<'tcx> InlineAssemblyGenerator<'_, 'tcx> {
         if is_x86 {
             generated_asm.push_str(".intel_syntax noprefix\n");
         }
-        Self::prologue(&mut generated_asm, self.arch);
+        if !self.is_naked {
+            Self::prologue(&mut generated_asm, self.arch);
+
+            // Save clobbered registers
+            if !self.options.contains(InlineAsmOptions::NORETURN) {
+                for (reg, slot) in self
+                    .registers
+                    .iter()
+                    .zip(self.stack_slots_clobber.iter().copied())
+                    .filter_map(|(r, s)| r.zip(s))
+                {
+                    Self::save_register(&mut generated_asm, self.arch, reg, slot);
+                }
+            }
 
-        // Save clobbered registers
-        if !self.options.contains(InlineAsmOptions::NORETURN) {
+            // Write input registers
             for (reg, slot) in self
                 .registers
                 .iter()
-                .zip(self.stack_slots_clobber.iter().copied())
+                .zip(self.stack_slots_input.iter().copied())
                 .filter_map(|(r, s)| r.zip(s))
             {
-                Self::save_register(&mut generated_asm, self.arch, reg, slot);
+                Self::restore_register(&mut generated_asm, self.arch, reg, slot);
             }
         }
 
-        // Write input registers
-        for (reg, slot) in self
-            .registers
-            .iter()
-            .zip(self.stack_slots_input.iter().copied())
-            .filter_map(|(r, s)| r.zip(s))
-        {
-            Self::restore_register(&mut generated_asm, self.arch, reg, slot);
-        }
-
         if is_x86 && self.options.contains(InlineAsmOptions::ATT_SYNTAX) {
             generated_asm.push_str(".att_syntax\n");
         }
@@ -553,30 +676,32 @@ impl<'tcx> InlineAssemblyGenerator<'_, 'tcx> {
             generated_asm.push_str(".intel_syntax noprefix\n");
         }
 
-        if !self.options.contains(InlineAsmOptions::NORETURN) {
-            // Read output registers
-            for (reg, slot) in self
-                .registers
-                .iter()
-                .zip(self.stack_slots_output.iter().copied())
-                .filter_map(|(r, s)| r.zip(s))
-            {
-                Self::save_register(&mut generated_asm, self.arch, reg, slot);
-            }
+        if !self.is_naked {
+            if !self.options.contains(InlineAsmOptions::NORETURN) {
+                // Read output registers
+                for (reg, slot) in self
+                    .registers
+                    .iter()
+                    .zip(self.stack_slots_output.iter().copied())
+                    .filter_map(|(r, s)| r.zip(s))
+                {
+                    Self::save_register(&mut generated_asm, self.arch, reg, slot);
+                }
 
-            // Restore clobbered registers
-            for (reg, slot) in self
-                .registers
-                .iter()
-                .zip(self.stack_slots_clobber.iter().copied())
-                .filter_map(|(r, s)| r.zip(s))
-            {
-                Self::restore_register(&mut generated_asm, self.arch, reg, slot);
-            }
+                // Restore clobbered registers
+                for (reg, slot) in self
+                    .registers
+                    .iter()
+                    .zip(self.stack_slots_clobber.iter().copied())
+                    .filter_map(|(r, s)| r.zip(s))
+                {
+                    Self::restore_register(&mut generated_asm, self.arch, reg, slot);
+                }
 
-            Self::epilogue(&mut generated_asm, self.arch);
-        } else {
-            Self::epilogue_noreturn(&mut generated_asm, self.arch);
+                Self::epilogue(&mut generated_asm, self.arch);
+            } else {
+                Self::epilogue_noreturn(&mut generated_asm, self.arch);
+            }
         }
 
         if is_x86 {
diff --git a/compiler/rustc_codegen_cranelift/src/intrinsics/llvm_x86.rs b/compiler/rustc_codegen_cranelift/src/intrinsics/llvm_x86.rs
index 8df83c706a1..27b55ecc72e 100644
--- a/compiler/rustc_codegen_cranelift/src/intrinsics/llvm_x86.rs
+++ b/compiler/rustc_codegen_cranelift/src/intrinsics/llvm_x86.rs
@@ -374,6 +374,21 @@ pub(crate) fn codegen_x86_llvm_intrinsic_call<'tcx>(
                 }
             }
         }
+        "llvm.x86.avx2.permd" => {
+            // https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_permutevar8x32_epi32
+            intrinsic_args!(fx, args => (a, idx); intrinsic);
+
+            for j in 0..=7 {
+                let index = idx.value_typed_lane(fx, fx.tcx.types.u32, j).load_scalar(fx);
+                let index = fx.bcx.ins().uextend(fx.pointer_type, index);
+                let value = a.value_lane_dyn(fx, index).load_scalar(fx);
+                ret.place_typed_lane(fx, fx.tcx.types.u32, j).to_ptr().store(
+                    fx,
+                    value,
+                    MemFlags::trusted(),
+                );
+            }
+        }
         "llvm.x86.avx2.vperm2i128"
         | "llvm.x86.avx.vperm2f128.ps.256"
         | "llvm.x86.avx.vperm2f128.pd.256" => {
@@ -832,6 +847,43 @@ pub(crate) fn codegen_x86_llvm_intrinsic_call<'tcx>(
             }
         }
 
+        "llvm.x86.sse42.crc32.32.8"
+        | "llvm.x86.sse42.crc32.32.16"
+        | "llvm.x86.sse42.crc32.32.32"
+        | "llvm.x86.sse42.crc32.64.64" => {
+            // https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#ig_expand=1419&text=_mm_crc32_u32
+            intrinsic_args!(fx, args => (crc, v); intrinsic);
+
+            let crc = crc.load_scalar(fx);
+            let v = v.load_scalar(fx);
+
+            let asm = match intrinsic {
+                "llvm.x86.sse42.crc32.32.8" => "crc32 eax, dl",
+                "llvm.x86.sse42.crc32.32.16" => "crc32 eax, dx",
+                "llvm.x86.sse42.crc32.32.32" => "crc32 eax, edx",
+                "llvm.x86.sse42.crc32.64.64" => "crc32 rax, rdx",
+                _ => unreachable!(),
+            };
+
+            codegen_inline_asm_inner(
+                fx,
+                &[InlineAsmTemplatePiece::String(asm.to_string())],
+                &[
+                    CInlineAsmOperand::InOut {
+                        reg: InlineAsmRegOrRegClass::Reg(InlineAsmReg::X86(X86InlineAsmReg::ax)),
+                        _late: true,
+                        in_value: crc,
+                        out_place: Some(ret),
+                    },
+                    CInlineAsmOperand::In {
+                        reg: InlineAsmRegOrRegClass::Reg(InlineAsmReg::X86(X86InlineAsmReg::dx)),
+                        value: v,
+                    },
+                ],
+                InlineAsmOptions::NOSTACK | InlineAsmOptions::PURE | InlineAsmOptions::NOMEM,
+            );
+        }
+
         "llvm.x86.sse42.pcmpestri128" => {
             // https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmpestri&ig_expand=939
             intrinsic_args!(fx, args => (a, la, b, lb, _imm8); intrinsic);
diff --git a/compiler/rustc_codegen_cranelift/src/lib.rs b/compiler/rustc_codegen_cranelift/src/lib.rs
index e72951b6f34..39bbad16b0c 100644
--- a/compiler/rustc_codegen_cranelift/src/lib.rs
+++ b/compiler/rustc_codegen_cranelift/src/lib.rs
@@ -331,9 +331,9 @@ fn build_isa(sess: &Session, backend_config: &BackendConfig) -> Arc<dyn TargetIs
                     sess.dcx().fatal(format!("can't compile for {}: {}", target_triple, err));
                 });
             if target_triple.architecture == target_lexicon::Architecture::X86_64 {
-                // Don't use "haswell" as the default, as it implies `has_lzcnt`.
-                // macOS CI is still at Ivy Bridge EP, so `lzcnt` is interpreted as `bsr`.
-                builder.enable("nehalem").unwrap();
+                // Only set the target cpu on x86_64 as Cranelift is missing
+                // the target cpu list for most other targets.
+                builder.enable(sess.target.cpu.as_ref()).unwrap();
             }
             builder
         }
diff --git a/compiler/rustc_codegen_cranelift/src/main_shim.rs b/compiler/rustc_codegen_cranelift/src/main_shim.rs
index 1f20ec42ddb..f9a729618a5 100644
--- a/compiler/rustc_codegen_cranelift/src/main_shim.rs
+++ b/compiler/rustc_codegen_cranelift/src/main_shim.rs
@@ -29,7 +29,7 @@ pub(crate) fn maybe_create_entry_wrapper(
 
     if main_def_id.is_local() {
         let instance = Instance::mono(tcx, main_def_id).polymorphize(tcx);
-        if !is_jit && module.get_name(tcx.symbol_name(instance).name).is_none() {
+        if module.get_name(tcx.symbol_name(instance).name).is_none() {
             return;
         }
     } else if !is_primary_cgu {
diff --git a/compiler/rustc_codegen_cranelift/src/value_and_place.rs b/compiler/rustc_codegen_cranelift/src/value_and_place.rs
index a11abd0c0e9..b6d6d211e65 100644
--- a/compiler/rustc_codegen_cranelift/src/value_and_place.rs
+++ b/compiler/rustc_codegen_cranelift/src/value_and_place.rs
@@ -317,14 +317,6 @@ impl<'tcx> CValue<'tcx> {
 
         let clif_ty = fx.clif_type(layout.ty).unwrap();
 
-        if let ty::Bool = layout.ty.kind() {
-            assert!(
-                const_val == ty::ScalarInt::FALSE || const_val == ty::ScalarInt::TRUE,
-                "Invalid bool 0x{:032X}",
-                const_val
-            );
-        }
-
         let val = match layout.ty.kind() {
             ty::Uint(UintTy::U128) | ty::Int(IntTy::I128) => {
                 let const_val = const_val.assert_bits(layout.size);