45 files changed, 15417 insertions, 0 deletions
diff --git a/compiler/rustc_codegen_cranelift/src/abi/comments.rs b/compiler/rustc_codegen_cranelift/src/abi/comments.rs
new file mode 100644
index 00000000000..a318cae1722
--- /dev/null
+++ b/compiler/rustc_codegen_cranelift/src/abi/comments.rs
@@ -0,0 +1,97 @@
+//! Annotate the clif ir with comments describing how arguments are passed into the current function
+//! and where all locals are stored.
+
+use std::borrow::Cow;
+
+use rustc_target::abi::call::PassMode;
+
+use crate::prelude::*;
+
+pub(super) fn add_args_header_comment(fx: &mut FunctionCx<'_, '_, '_>) {
+    if fx.clif_comments.enabled() {
+        fx.add_global_comment(
+            "kind  loc.idx   param    pass mode                            ty".to_string(),
+        );
+    }
+}
+
+pub(super) fn add_arg_comment<'tcx>(
+    fx: &mut FunctionCx<'_, '_, 'tcx>,
+    kind: &str,
+    local: Option<mir::Local>,
+    local_field: Option<usize>,
+    params: &[Value],
+    arg_abi_mode: &PassMode,
+    arg_layout: TyAndLayout<'tcx>,
+) {
+    if !fx.clif_comments.enabled() {
+        return;
+    }
+
+    let local = if let Some(local) = local {
+        Cow::Owned(format!("{:?}", local))
+    } else {
+        Cow::Borrowed("???")
+    };
+    let local_field = if let Some(local_field) = local_field {
+        Cow::Owned(format!(".{}", local_field))
+    } else {
+        Cow::Borrowed("")
+    };
+
+    let params = match params {
+        [] => Cow::Borrowed("-"),
+        [param] => Cow::Owned(format!("= {:?}", param)),
+        [param_a, param_b] => Cow::Owned(format!("= {:?},{:?}", param_a, param_b)),
+        params => Cow::Owned(format!(
+            "= {}",
+            params.iter().map(ToString::to_string).collect::<Vec<_>>().join(",")
+        )),
+    };
+
+    let pass_mode = format!("{:?}", arg_abi_mode);
+    fx.add_global_comment(format!(
+        "{kind:5}{local:>3}{local_field:<5} {params:10} {pass_mode:36} {ty:?}",
+        kind = kind,
+        local = local,
+        local_field = local_field,
+        params = params,
+        pass_mode = pass_mode,
+        ty = arg_layout.ty,
+    ));
+}
+
+pub(super) fn add_locals_header_comment(fx: &mut FunctionCx<'_, '_, '_>) {
+    if fx.clif_comments.enabled() {
+        fx.add_global_comment(String::new());
+        fx.add_global_comment(
+            "kind  local ty                              size align (abi,pref)".to_string(),
+        );
+    }
+}
+
+pub(super) fn add_local_place_comments<'tcx>(
+    fx: &mut FunctionCx<'_, '_, 'tcx>,
+    place: CPlace<'tcx>,
+    local: Local,
+) {
+    if !fx.clif_comments.enabled() {
+        return;
+    }
+    let TyAndLayout { ty, layout } = place.layout();
+    let rustc_target::abi::LayoutS { size, align, .. } = layout.0.0;
+
+    let (kind, extra) = place.debug_comment();
+
+    fx.add_global_comment(format!(
+        "{:<5} {:5} {:30} {:4}b {}, {}{}{}",
+        kind,
+        format!("{:?}", local),
+        format!("{:?}", ty),
+        size.bytes(),
+        align.abi.bytes(),
+        align.pref.bytes(),
+        if extra.is_empty() { "" } else { "              " },
+        extra,
+    ));
+}
diff --git a/compiler/rustc_codegen_cranelift/src/abi/mod.rs b/compiler/rustc_codegen_cranelift/src/abi/mod.rs
new file mode 100644
index 00000000000..4bcef15ad04
--- /dev/null
+++ b/compiler/rustc_codegen_cranelift/src/abi/mod.rs
@@ -0,0 +1,700 @@
+//! Handling of everything related to the calling convention. Also fills `fx.local_map`.
+
+mod comments;
+mod pass_mode;
+mod returning;
+
+use std::borrow::Cow;
+
+use cranelift_codegen::ir::SigRef;
+use cranelift_codegen::isa::CallConv;
+use cranelift_module::ModuleError;
+use rustc_codegen_ssa::errors::CompilerBuiltinsCannotCall;
+use rustc_middle::middle::codegen_fn_attrs::CodegenFnAttrFlags;
+use rustc_middle::ty::layout::FnAbiOf;
+use rustc_middle::ty::print::with_no_trimmed_paths;
+use rustc_middle::ty::TypeVisitableExt;
+use rustc_monomorphize::is_call_from_compiler_builtins_to_upstream_monomorphization;
+use rustc_session::Session;
+use rustc_span::source_map::Spanned;
+use rustc_target::abi::call::{Conv, FnAbi};
+use rustc_target::spec::abi::Abi;
+
+use self::pass_mode::*;
+pub(crate) use self::returning::codegen_return;
+use crate::prelude::*;
+
+fn clif_sig_from_fn_abi<'tcx>(
+    tcx: TyCtxt<'tcx>,
+    default_call_conv: CallConv,
+    fn_abi: &FnAbi<'tcx, Ty<'tcx>>,
+) -> Signature {
+    let call_conv = conv_to_call_conv(tcx.sess, fn_abi.conv, default_call_conv);
+
+    let inputs = fn_abi.args.iter().flat_map(|arg_abi| arg_abi.get_abi_param(tcx).into_iter());
+
+    let (return_ptr, returns) = fn_abi.ret.get_abi_return(tcx);
+    // Sometimes the first param is a pointer to the place where the return value needs to be stored.
+    let params: Vec<_> = return_ptr.into_iter().chain(inputs).collect();
+
+    Signature { params, returns, call_conv }
+}
+
+pub(crate) fn conv_to_call_conv(sess: &Session, c: Conv, default_call_conv: CallConv) -> CallConv {
+    match c {
+        Conv::Rust | Conv::C => default_call_conv,
+        Conv::Cold | Conv::PreserveMost | Conv::PreserveAll => CallConv::Cold,
+        Conv::X86_64SysV => CallConv::SystemV,
+        Conv::X86_64Win64 => CallConv::WindowsFastcall,
+
+        // Should already get a back compat warning
+        Conv::X86Fastcall | Conv::X86Stdcall | Conv::X86ThisCall | Conv::X86VectorCall => {
+            default_call_conv
+        }
+
+        Conv::X86Intr | Conv::RiscvInterrupt { .. } => {
+            sess.dcx().fatal(format!("interrupt call conv {c:?} not yet implemented"))
+        }
+
+        Conv::ArmAapcs => sess.dcx().fatal("aapcs call conv not yet implemented"),
+        Conv::CCmseNonSecureCall => {
+            sess.dcx().fatal("C-cmse-nonsecure-call call conv is not yet implemented");
+        }
+
+        Conv::Msp430Intr | Conv::PtxKernel | Conv::AvrInterrupt | Conv::AvrNonBlockingInterrupt => {
+            unreachable!("tried to use {c:?} call conv which only exists on an unsupported target");
+        }
+    }
+}
+
+pub(crate) fn get_function_sig<'tcx>(
+    tcx: TyCtxt<'tcx>,
+    default_call_conv: CallConv,
+    inst: Instance<'tcx>,
+) -> Signature {
+    assert!(!inst.args.has_infer());
+    clif_sig_from_fn_abi(
+        tcx,
+        default_call_conv,
+        &RevealAllLayoutCx(tcx).fn_abi_of_instance(inst, ty::List::empty()),
+    )
+}
+
+/// Instance must be monomorphized
+pub(crate) fn import_function<'tcx>(
+    tcx: TyCtxt<'tcx>,
+    module: &mut dyn Module,
+    inst: Instance<'tcx>,
+) -> FuncId {
+    let name = tcx.symbol_name(inst).name;
+    let sig = get_function_sig(tcx, module.target_config().default_call_conv, inst);
+    match module.declare_function(name, Linkage::Import, &sig) {
+        Ok(func_id) => func_id,
+        Err(ModuleError::IncompatibleDeclaration(_)) => tcx.dcx().fatal(format!(
+            "attempt to declare `{name}` as function, but it was already declared as static"
+        )),
+        Err(ModuleError::IncompatibleSignature(_, prev_sig, new_sig)) => tcx.dcx().fatal(format!(
+            "attempt to declare `{name}` with signature {new_sig:?}, \
+             but it was already declared with signature {prev_sig:?}"
+        )),
+        Err(err) => Err::<_, _>(err).unwrap(),
+    }
+}
+
+impl<'tcx> FunctionCx<'_, '_, 'tcx> {
+    /// Instance must be monomorphized
+    pub(crate) fn get_function_ref(&mut self, inst: Instance<'tcx>) -> FuncRef {
+        let func_id = import_function(self.tcx, self.module, inst);
+        let func_ref = self.module.declare_func_in_func(func_id, &mut self.bcx.func);
+
+        if self.clif_comments.enabled() {
+            self.add_comment(func_ref, format!("{:?}", inst));
+        }
+
+        func_ref
+    }
+
+    pub(crate) fn lib_call(
+        &mut self,
+        name: &str,
+        params: Vec<AbiParam>,
+        returns: Vec<AbiParam>,
+        args: &[Value],
+    ) -> Cow<'_, [Value]> {
+        if self.tcx.sess.target.is_like_windows {
+            let (mut params, mut args): (Vec<_>, Vec<_>) = params
+                .into_iter()
+                .zip(args)
+                .map(|(param, &arg)| {
+                    if param.value_type == types::I128 {
+                        let arg_ptr = self.create_stack_slot(16, 16);
+                        arg_ptr.store(self, arg, MemFlags::trusted());
+                        (AbiParam::new(self.pointer_type), arg_ptr.get_addr(self))
+                    } else {
+                        (param, arg)
+                    }
+                })
+                .unzip();
+
+            let indirect_ret_val = returns.len() == 1 && returns[0].value_type == types::I128;
+
+            if indirect_ret_val {
+                params.insert(0, AbiParam::new(self.pointer_type));
+                let ret_ptr = self.create_stack_slot(16, 16);
+                args.insert(0, ret_ptr.get_addr(self));
+                self.lib_call_unadjusted(name, params, vec![], &args);
+                return Cow::Owned(vec![ret_ptr.load(self, types::I128, MemFlags::trusted())]);
+            } else {
+                return self.lib_call_unadjusted(name, params, returns, &args);
+            }
+        }
+
+        self.lib_call_unadjusted(name, params, returns, args)
+    }
+
+    pub(crate) fn lib_call_unadjusted(
+        &mut self,
+        name: &str,
+        params: Vec<AbiParam>,
+        returns: Vec<AbiParam>,
+        args: &[Value],
+    ) -> Cow<'_, [Value]> {
+        let sig = Signature { params, returns, call_conv: self.target_config.default_call_conv };
+        let func_id = self.module.declare_function(name, Linkage::Import, &sig).unwrap();
+        let func_ref = self.module.declare_func_in_func(func_id, &mut self.bcx.func);
+        if self.clif_comments.enabled() {
+            self.add_comment(func_ref, format!("{:?}", name));
+        }
+        let call_inst = self.bcx.ins().call(func_ref, args);
+        if self.clif_comments.enabled() {
+            self.add_comment(call_inst, format!("lib_call {}", name));
+        }
+        let results = self.bcx.inst_results(call_inst);
+        assert!(results.len() <= 2, "{}", results.len());
+        Cow::Borrowed(results)
+    }
+}
+
+/// Make a [`CPlace`] capable of holding value of the specified type.
+fn make_local_place<'tcx>(
+    fx: &mut FunctionCx<'_, '_, 'tcx>,
+    local: Local,
+    layout: TyAndLayout<'tcx>,
+    is_ssa: bool,
+) -> CPlace<'tcx> {
+    if layout.is_unsized() {
+        fx.tcx.dcx().span_fatal(
+            fx.mir.local_decls[local].source_info.span,
+            "unsized locals are not yet supported",
+        );
+    }
+    let place = if is_ssa {
+        if let rustc_target::abi::Abi::ScalarPair(_, _) = layout.abi {
+            CPlace::new_var_pair(fx, local, layout)
+        } else {
+            CPlace::new_var(fx, local, layout)
+        }
+    } else {
+        CPlace::new_stack_slot(fx, layout)
+    };
+
+    self::comments::add_local_place_comments(fx, place, local);
+
+    place
+}
+
+pub(crate) fn codegen_fn_prelude<'tcx>(fx: &mut FunctionCx<'_, '_, 'tcx>, start_block: Block) {
+    fx.bcx.append_block_params_for_function_params(start_block);
+
+    fx.bcx.switch_to_block(start_block);
+    fx.bcx.ins().nop();
+
+    let ssa_analyzed = crate::analyze::analyze(fx);
+
+    self::comments::add_args_header_comment(fx);
+
+    let mut block_params_iter = fx.bcx.func.dfg.block_params(start_block).to_vec().into_iter();
+    let ret_place =
+        self::returning::codegen_return_param(fx, &ssa_analyzed, &mut block_params_iter);
+    assert_eq!(fx.local_map.push(ret_place), RETURN_PLACE);
+
+    // None means pass_mode == NoPass
+    enum ArgKind<'tcx> {
+        Normal(Option<CValue<'tcx>>),
+        Spread(Vec<Option<CValue<'tcx>>>),
+    }
+
+    // FIXME implement variadics in cranelift
+    if fx.fn_abi.c_variadic {
+        fx.tcx.dcx().span_fatal(
+            fx.mir.span,
+            "Defining variadic functions is not yet supported by Cranelift",
+        );
+    }
+
+    let mut arg_abis_iter = fx.fn_abi.args.iter();
+
+    let func_params = fx
+        .mir
+        .args_iter()
+        .map(|local| {
+            let arg_ty = fx.monomorphize(fx.mir.local_decls[local].ty);
+
+            // Adapted from https://github.com/rust-lang/rust/blob/145155dc96757002c7b2e9de8489416e2fdbbd57/src/librustc_codegen_llvm/mir/mod.rs#L442-L482
+            if Some(local) == fx.mir.spread_arg {
+                // This argument (e.g. the last argument in the "rust-call" ABI)
+                // is a tuple that was spread at the ABI level and now we have
+                // to reconstruct it into a tuple local variable, from multiple
+                // individual function arguments.
+
+                let tupled_arg_tys = match arg_ty.kind() {
+                    ty::Tuple(ref tys) => tys,
+                    _ => bug!("spread argument isn't a tuple?! but {:?}", arg_ty),
+                };
+
+                let mut params = Vec::new();
+                for (i, _arg_ty) in tupled_arg_tys.iter().enumerate() {
+                    let arg_abi = arg_abis_iter.next().unwrap();
+                    let param =
+                        cvalue_for_param(fx, Some(local), Some(i), arg_abi, &mut block_params_iter);
+                    params.push(param);
+                }
+
+                (local, ArgKind::Spread(params), arg_ty)
+            } else {
+                let arg_abi = arg_abis_iter.next().unwrap();
+                let param =
+                    cvalue_for_param(fx, Some(local), None, arg_abi, &mut block_params_iter);
+                (local, ArgKind::Normal(param), arg_ty)
+            }
+        })
+        .collect::<Vec<(Local, ArgKind<'tcx>, Ty<'tcx>)>>();
+
+    assert!(fx.caller_location.is_none());
+    if fx.instance.def.requires_caller_location(fx.tcx) {
+        // Store caller location for `#[track_caller]`.
+        let arg_abi = arg_abis_iter.next().unwrap();
+        fx.caller_location =
+            Some(cvalue_for_param(fx, None, None, arg_abi, &mut block_params_iter).unwrap());
+    }
+
+    assert!(arg_abis_iter.next().is_none(), "ArgAbi left behind");
+    assert!(block_params_iter.next().is_none(), "arg_value left behind");
+
+    self::comments::add_locals_header_comment(fx);
+
+    for (local, arg_kind, ty) in func_params {
+        // While this is normally an optimization to prevent an unnecessary copy when an argument is
+        // not mutated by the current function, this is necessary to support unsized arguments.
+        if let ArgKind::Normal(Some(val)) = arg_kind {
+            if let Some((addr, meta)) = val.try_to_ptr() {
+                // Ownership of the value at the backing storage for an argument is passed to the
+                // callee per the ABI, so it is fine to borrow the backing storage of this argument
+                // to prevent a copy.
+
+                let place = if let Some(meta) = meta {
+                    CPlace::for_ptr_with_extra(addr, meta, val.layout())
+                } else {
+                    CPlace::for_ptr(addr, val.layout())
+                };
+
+                self::comments::add_local_place_comments(fx, place, local);
+
+                assert_eq!(fx.local_map.push(place), local);
+                continue;
+            }
+        }
+
+        let layout = fx.layout_of(ty);
+        let is_ssa = ssa_analyzed[local].is_ssa(fx, ty);
+        let place = make_local_place(fx, local, layout, is_ssa);
+        assert_eq!(fx.local_map.push(place), local);
+
+        match arg_kind {
+            ArgKind::Normal(param) => {
+                if let Some(param) = param {
+                    place.write_cvalue(fx, param);
+                }
+            }
+            ArgKind::Spread(params) => {
+                for (i, param) in params.into_iter().enumerate() {
+                    if let Some(param) = param {
+                        place.place_field(fx, FieldIdx::new(i)).write_cvalue(fx, param);
+                    }
+                }
+            }
+        }
+    }
+
+    for local in fx.mir.vars_and_temps_iter() {
+        let ty = fx.monomorphize(fx.mir.local_decls[local].ty);
+        let layout = fx.layout_of(ty);
+
+        let is_ssa = ssa_analyzed[local].is_ssa(fx, ty);
+
+        let place = make_local_place(fx, local, layout, is_ssa);
+        assert_eq!(fx.local_map.push(place), local);
+    }
+
+    fx.bcx.ins().jump(*fx.block_map.get(START_BLOCK).unwrap(), &[]);
+}
+
+struct CallArgument<'tcx> {
+    value: CValue<'tcx>,
+    is_owned: bool,
+}
+
+// FIXME avoid intermediate `CValue` before calling `adjust_arg_for_abi`
+fn codegen_call_argument_operand<'tcx>(
+    fx: &mut FunctionCx<'_, '_, 'tcx>,
+    operand: &Operand<'tcx>,
+) -> CallArgument<'tcx> {
+    CallArgument {
+        value: codegen_operand(fx, operand),
+        is_owned: matches!(operand, Operand::Move(_)),
+    }
+}
+
+pub(crate) fn codegen_terminator_call<'tcx>(
+    fx: &mut FunctionCx<'_, '_, 'tcx>,
+    source_info: mir::SourceInfo,
+    func: &Operand<'tcx>,
+    args: &[Spanned<Operand<'tcx>>],
+    destination: Place<'tcx>,
+    target: Option<BasicBlock>,
+) {
+    let func = codegen_operand(fx, func);
+    let fn_sig = func.layout().ty.fn_sig(fx.tcx);
+
+    let ret_place = codegen_place(fx, destination);
+
+    // Handle special calls like intrinsics and empty drop glue.
+    let instance = if let ty::FnDef(def_id, fn_args) = *func.layout().ty.kind() {
+        let instance =
+            ty::Instance::expect_resolve(fx.tcx, ty::ParamEnv::reveal_all(), def_id, fn_args)
+                .polymorphize(fx.tcx);
+
+        if is_call_from_compiler_builtins_to_upstream_monomorphization(fx.tcx, instance) {
+            if target.is_some() {
+                let caller = with_no_trimmed_paths!(fx.tcx.def_path_str(fx.instance.def_id()));
+                let callee = with_no_trimmed_paths!(fx.tcx.def_path_str(def_id));
+                fx.tcx.dcx().emit_err(CompilerBuiltinsCannotCall { caller, callee });
+            } else {
+                fx.bcx.ins().trap(TrapCode::User(0));
+                return;
+            }
+        }
+
+        if fx.tcx.symbol_name(instance).name.starts_with("llvm.") {
+            crate::intrinsics::codegen_llvm_intrinsic_call(
+                fx,
+                &fx.tcx.symbol_name(instance).name,
+                fn_args,
+                args,
+                ret_place,
+                target,
+                source_info.span,
+            );
+            return;
+        }
+
+        match instance.def {
+            InstanceDef::Intrinsic(_) => {
+                match crate::intrinsics::codegen_intrinsic_call(
+                    fx,
+                    instance,
+                    args,
+                    ret_place,
+                    target,
+                    source_info,
+                ) {
+                    Ok(()) => return,
+                    Err(instance) => Some(instance),
+                }
+            }
+            InstanceDef::DropGlue(_, None) | ty::InstanceDef::AsyncDropGlueCtorShim(_, None) => {
+                // empty drop glue - a nop.
+                let dest = target.expect("Non terminating drop_in_place_real???");
+                let ret_block = fx.get_block(dest);
+                fx.bcx.ins().jump(ret_block, &[]);
+                return;
+            }
+            _ => Some(instance),
+        }
+    } else {
+        None
+    };
+
+    let extra_args = &args[fn_sig.inputs().skip_binder().len()..];
+    let extra_args = fx.tcx.mk_type_list_from_iter(
+        extra_args.iter().map(|op_arg| fx.monomorphize(op_arg.node.ty(fx.mir, fx.tcx))),
+    );
+    let fn_abi = if let Some(instance) = instance {
+        RevealAllLayoutCx(fx.tcx).fn_abi_of_instance(instance, extra_args)
+    } else {
+        RevealAllLayoutCx(fx.tcx).fn_abi_of_fn_ptr(fn_sig, extra_args)
+    };
+
+    let is_cold = if fn_sig.abi() == Abi::RustCold {
+        true
+    } else {
+        instance.is_some_and(|inst| {
+            fx.tcx.codegen_fn_attrs(inst.def_id()).flags.contains(CodegenFnAttrFlags::COLD)
+        })
+    };
+    if is_cold {
+        fx.bcx.set_cold_block(fx.bcx.current_block().unwrap());
+        if let Some(destination_block) = target {
+            fx.bcx.set_cold_block(fx.get_block(destination_block));
+        }
+    }
+
+    // Unpack arguments tuple for closures
+    let mut args = if fn_sig.abi() == Abi::RustCall {
+        let (self_arg, pack_arg) = match args {
+            [pack_arg] => (None, codegen_call_argument_operand(fx, &pack_arg.node)),
+            [self_arg, pack_arg] => (
+                Some(codegen_call_argument_operand(fx, &self_arg.node)),
+                codegen_call_argument_operand(fx, &pack_arg.node),
+            ),
+            _ => panic!("rust-call abi requires one or two arguments"),
+        };
+
+        let tupled_arguments = match pack_arg.value.layout().ty.kind() {
+            ty::Tuple(ref tupled_arguments) => tupled_arguments,
+            _ => bug!("argument to function with \"rust-call\" ABI is not a tuple"),
+        };
+
+        let mut args = Vec::with_capacity(1 + tupled_arguments.len());
+        args.extend(self_arg);
+        for i in 0..tupled_arguments.len() {
+            args.push(CallArgument {
+                value: pack_arg.value.value_field(fx, FieldIdx::new(i)),
+                is_owned: pack_arg.is_owned,
+            });
+        }
+        args
+    } else {
+        args.iter().map(|arg| codegen_call_argument_operand(fx, &arg.node)).collect::<Vec<_>>()
+    };
+
+    // Pass the caller location for `#[track_caller]`.
+    if instance.is_some_and(|inst| inst.def.requires_caller_location(fx.tcx)) {
+        let caller_location = fx.get_caller_location(source_info);
+        args.push(CallArgument { value: caller_location, is_owned: false });
+    }
+
+    let args = args;
+    assert_eq!(fn_abi.args.len(), args.len());
+
+    enum CallTarget {
+        Direct(FuncRef),
+        Indirect(SigRef, Value),
+    }
+
+    let (func_ref, first_arg_override) = match instance {
+        // Trait object call
+        Some(Instance { def: InstanceDef::Virtual(_, idx), .. }) => {
+            if fx.clif_comments.enabled() {
+                let nop_inst = fx.bcx.ins().nop();
+                fx.add_comment(
+                    nop_inst,
+                    format!("virtual call; self arg pass mode: {:?}", &fn_abi.args[0]),
+                );
+            }
+
+            let (ptr, method) = crate::vtable::get_ptr_and_method_ref(fx, args[0].value, idx);
+            let sig = clif_sig_from_fn_abi(fx.tcx, fx.target_config.default_call_conv, &fn_abi);
+            let sig = fx.bcx.import_signature(sig);
+
+            (CallTarget::Indirect(sig, method), Some(ptr.get_addr(fx)))
+        }
+
+        // Normal call
+        Some(instance) => {
+            let func_ref = fx.get_function_ref(instance);
+            (CallTarget::Direct(func_ref), None)
+        }
+
+        // Indirect call
+        None => {
+            if fx.clif_comments.enabled() {
+                let nop_inst = fx.bcx.ins().nop();
+                fx.add_comment(nop_inst, "indirect call");
+            }
+
+            let func = func.load_scalar(fx);
+            let sig = clif_sig_from_fn_abi(fx.tcx, fx.target_config.default_call_conv, &fn_abi);
+            let sig = fx.bcx.import_signature(sig);
+
+            (CallTarget::Indirect(sig, func), None)
+        }
+    };
+
+    self::returning::codegen_with_call_return_arg(fx, &fn_abi.ret, ret_place, |fx, return_ptr| {
+        let call_args = return_ptr
+            .into_iter()
+            .chain(first_arg_override.into_iter())
+            .chain(
+                args.into_iter()
+                    .enumerate()
+                    .skip(if first_arg_override.is_some() { 1 } else { 0 })
+                    .flat_map(|(i, arg)| {
+                        adjust_arg_for_abi(fx, arg.value, &fn_abi.args[i], arg.is_owned).into_iter()
+                    }),
+            )
+            .collect::<Vec<Value>>();
+
+        let call_inst = match func_ref {
+            CallTarget::Direct(func_ref) => fx.bcx.ins().call(func_ref, &call_args),
+            CallTarget::Indirect(sig, func_ptr) => {
+                fx.bcx.ins().call_indirect(sig, func_ptr, &call_args)
+            }
+        };
+
+        // FIXME find a cleaner way to support varargs
+        if fn_sig.c_variadic() {
+            if !matches!(fn_sig.abi(), Abi::C { .. }) {
+                fx.tcx.dcx().span_fatal(
+                    source_info.span,
+                    format!("Variadic call for non-C abi {:?}", fn_sig.abi()),
+                );
+            }
+            let sig_ref = fx.bcx.func.dfg.call_signature(call_inst).unwrap();
+            let abi_params = call_args
+                .into_iter()
+                .map(|arg| {
+                    let ty = fx.bcx.func.dfg.value_type(arg);
+                    if !ty.is_int() {
+                        // FIXME set %al to upperbound on float args once floats are supported
+                        fx.tcx.dcx().span_fatal(
+                            source_info.span,
+                            format!("Non int ty {:?} for variadic call", ty),
+                        );
+                    }
+                    AbiParam::new(ty)
+                })
+                .collect::<Vec<AbiParam>>();
+            fx.bcx.func.dfg.signatures[sig_ref].params = abi_params;
+        }
+
+        call_inst
+    });
+
+    if let Some(dest) = target {
+        let ret_block = fx.get_block(dest);
+        fx.bcx.ins().jump(ret_block, &[]);
+    } else {
+        fx.bcx.ins().trap(TrapCode::UnreachableCodeReached);
+    }
+}
+
+pub(crate) fn codegen_drop<'tcx>(
+    fx: &mut FunctionCx<'_, '_, 'tcx>,
+    source_info: mir::SourceInfo,
+    drop_place: CPlace<'tcx>,
+) {
+    let ty = drop_place.layout().ty;
+    let drop_instance = Instance::resolve_drop_in_place(fx.tcx, ty).polymorphize(fx.tcx);
+
+    if let ty::InstanceDef::DropGlue(_, None) | ty::InstanceDef::AsyncDropGlueCtorShim(_, None) =
+        drop_instance.def
+    {
+        // we don't actually need to drop anything
+    } else {
+        match ty.kind() {
+            ty::Dynamic(_, _, ty::Dyn) => {
+                // IN THIS ARM, WE HAVE:
+                // ty = *mut (dyn Trait)
+                // which is: exists<T> ( *mut T,    Vtable<T: Trait> )
+                //                       args[0]    args[1]
+                //
+                // args = ( Data, Vtable )
+                //                  |
+                //                  v
+                //                /-------\
+                //                | ...   |
+                //                \-------/
+                //
+                let (ptr, vtable) = drop_place.to_ptr_unsized();
+                let ptr = ptr.get_addr(fx);
+                let drop_fn = crate::vtable::drop_fn_of_obj(fx, vtable);
+
+                // FIXME(eddyb) perhaps move some of this logic into
+                // `Instance::resolve_drop_in_place`?
+                let virtual_drop = Instance {
+                    def: ty::InstanceDef::Virtual(drop_instance.def_id(), 0),
+                    args: drop_instance.args,
+                };
+                let fn_abi =
+                    RevealAllLayoutCx(fx.tcx).fn_abi_of_instance(virtual_drop, ty::List::empty());
+
+                let sig = clif_sig_from_fn_abi(fx.tcx, fx.target_config.default_call_conv, &fn_abi);
+                let sig = fx.bcx.import_signature(sig);
+                fx.bcx.ins().call_indirect(sig, drop_fn, &[ptr]);
+            }
+            ty::Dynamic(_, _, ty::DynStar) => {
+                // IN THIS ARM, WE HAVE:
+                // ty = *mut (dyn* Trait)
+                // which is: *mut exists<T: sizeof(T) == sizeof(usize)> (T, Vtable<T: Trait>)
+                //
+                // args = [ * ]
+                //          |
+                //          v
+                //      ( Data, Vtable )
+                //                |
+                //                v
+                //              /-------\
+                //              | ...   |
+                //              \-------/
+                //
+                //
+                // WE CAN CONVERT THIS INTO THE ABOVE LOGIC BY DOING
+                //
+                // data = &(*args[0]).0    // gives a pointer to Data above (really the same pointer)
+                // vtable = (*args[0]).1   // loads the vtable out
+                // (data, vtable)          // an equivalent Rust `*mut dyn Trait`
+                //
+                // SO THEN WE CAN USE THE ABOVE CODE.
+                let (data, vtable) = drop_place.to_cvalue(fx).dyn_star_force_data_on_stack(fx);
+                let drop_fn = crate::vtable::drop_fn_of_obj(fx, vtable);
+
+                let virtual_drop = Instance {
+                    def: ty::InstanceDef::Virtual(drop_instance.def_id(), 0),
+                    args: drop_instance.args,
+                };
+                let fn_abi =
+                    RevealAllLayoutCx(fx.tcx).fn_abi_of_instance(virtual_drop, ty::List::empty());
+
+                let sig = clif_sig_from_fn_abi(fx.tcx, fx.target_config.default_call_conv, &fn_abi);
+                let sig = fx.bcx.import_signature(sig);
+                fx.bcx.ins().call_indirect(sig, drop_fn, &[data]);
+            }
+            _ => {
+                assert!(!matches!(drop_instance.def, InstanceDef::Virtual(_, _)));
+
+                let fn_abi =
+                    RevealAllLayoutCx(fx.tcx).fn_abi_of_instance(drop_instance, ty::List::empty());
+
+                let arg_value = drop_place.place_ref(
+                    fx,
+                    fx.layout_of(Ty::new_mut_ref(fx.tcx, fx.tcx.lifetimes.re_erased, ty)),
+                );
+                let arg_value = adjust_arg_for_abi(fx, arg_value, &fn_abi.args[0], true);
+
+                let mut call_args: Vec<Value> = arg_value.into_iter().collect::<Vec<_>>();
+
+                if drop_instance.def.requires_caller_location(fx.tcx) {
+                    // Pass the caller location for `#[track_caller]`.
+                    let caller_location = fx.get_caller_location(source_info);
+                    call_args.extend(
+                        adjust_arg_for_abi(fx, caller_location, &fn_abi.args[1], false).into_iter(),
+                    );
+                }
+
+                let func_ref = fx.get_function_ref(drop_instance);
+                fx.bcx.ins().call(func_ref, &call_args);
+            }
+        }
+    }
+}
diff --git a/compiler/rustc_codegen_cranelift/src/abi/pass_mode.rs b/compiler/rustc_codegen_cranelift/src/abi/pass_mode.rs
new file mode 100644
index 00000000000..06522670029
--- /dev/null
+++ b/compiler/rustc_codegen_cranelift/src/abi/pass_mode.rs
@@ -0,0 +1,303 @@
+//! Argument passing
+
+use cranelift_codegen::ir::{ArgumentExtension, ArgumentPurpose};
+use rustc_target::abi::call::{
+    ArgAbi, ArgAttributes, ArgExtension as RustcArgExtension, CastTarget, PassMode, Reg, RegKind,
+};
+use smallvec::{smallvec, SmallVec};
+
+use crate::prelude::*;
+use crate::value_and_place::assert_assignable;
+
+pub(super) trait ArgAbiExt<'tcx> {
+    fn get_abi_param(&self, tcx: TyCtxt<'tcx>) -> SmallVec<[AbiParam; 2]>;
+    fn get_abi_return(&self, tcx: TyCtxt<'tcx>) -> (Option<AbiParam>, Vec<AbiParam>);
+}
+
+fn reg_to_abi_param(reg: Reg) -> AbiParam {
+    let clif_ty = match (reg.kind, reg.size.bytes()) {
+        (RegKind::Integer, 1) => types::I8,
+        (RegKind::Integer, 2) => types::I16,
+        (RegKind::Integer, 3..=4) => types::I32,
+        (RegKind::Integer, 5..=8) => types::I64,
+        (RegKind::Integer, 9..=16) => types::I128,
+        (RegKind::Float, 4) => types::F32,
+        (RegKind::Float, 8) => types::F64,
+        (RegKind::Vector, size) => types::I8.by(u32::try_from(size).unwrap()).unwrap(),
+        _ => unreachable!("{:?}", reg),
+    };
+    AbiParam::new(clif_ty)
+}
+
+fn apply_arg_attrs_to_abi_param(mut param: AbiParam, arg_attrs: ArgAttributes) -> AbiParam {
+    match arg_attrs.arg_ext {
+        RustcArgExtension::None => {}
+        RustcArgExtension::Zext => param.extension = ArgumentExtension::Uext,
+        RustcArgExtension::Sext => param.extension = ArgumentExtension::Sext,
+    }
+    param
+}
+
+fn cast_target_to_abi_params(cast: &CastTarget) -> SmallVec<[AbiParam; 2]> {
+    let (rest_count, rem_bytes) = if cast.rest.unit.size.bytes() == 0 {
+        (0, 0)
+    } else {
+        (
+            cast.rest.total.bytes() / cast.rest.unit.size.bytes(),
+            cast.rest.total.bytes() % cast.rest.unit.size.bytes(),
+        )
+    };
+
+    // Note: Unlike the LLVM equivalent of this code we don't have separate branches for when there
+    // is no prefix as a single unit, an array and a heterogeneous struct are not represented using
+    // different types in Cranelift IR. Instead a single array of primitive types is used.
+
+    // Create list of fields in the main structure
+    let mut args = cast
+        .prefix
+        .iter()
+        .flatten()
+        .map(|&reg| reg_to_abi_param(reg))
+        .chain((0..rest_count).map(|_| reg_to_abi_param(cast.rest.unit)))
+        .collect::<SmallVec<_>>();
+
+    // Append final integer
+    if rem_bytes != 0 {
+        // Only integers can be really split further.
+        assert_eq!(cast.rest.unit.kind, RegKind::Integer);
+        args.push(reg_to_abi_param(Reg {
+            kind: RegKind::Integer,
+            size: Size::from_bytes(rem_bytes),
+        }));
+    }
+
+    args
+}
+
+impl<'tcx> ArgAbiExt<'tcx> for ArgAbi<'tcx, Ty<'tcx>> {
+    fn get_abi_param(&self, tcx: TyCtxt<'tcx>) -> SmallVec<[AbiParam; 2]> {
+        match self.mode {
+            PassMode::Ignore => smallvec![],
+            PassMode::Direct(attrs) => match self.layout.abi {
+                Abi::Scalar(scalar) => smallvec![apply_arg_attrs_to_abi_param(
+                    AbiParam::new(scalar_to_clif_type(tcx, scalar)),
+                    attrs
+                )],
+                Abi::Vector { .. } => {
+                    let vector_ty = crate::intrinsics::clif_vector_type(tcx, self.layout);
+                    smallvec![AbiParam::new(vector_ty)]
+                }
+                _ => unreachable!("{:?}", self.layout.abi),
+            },
+            PassMode::Pair(attrs_a, attrs_b) => match self.layout.abi {
+                Abi::ScalarPair(a, b) => {
+                    let a = scalar_to_clif_type(tcx, a);
+                    let b = scalar_to_clif_type(tcx, b);
+                    smallvec![
+                        apply_arg_attrs_to_abi_param(AbiParam::new(a), attrs_a),
+                        apply_arg_attrs_to_abi_param(AbiParam::new(b), attrs_b),
+                    ]
+                }
+                _ => unreachable!("{:?}", self.layout.abi),
+            },
+            PassMode::Cast { ref cast, pad_i32 } => {
+                assert!(!pad_i32, "padding support not yet implemented");
+                cast_target_to_abi_params(cast)
+            }
+            PassMode::Indirect { attrs, meta_attrs: None, on_stack } => {
+                if on_stack {
+                    // Abi requires aligning struct size to pointer size
+                    let size = self.layout.size.align_to(tcx.data_layout.pointer_align.abi);
+                    let size = u32::try_from(size.bytes()).unwrap();
+                    smallvec![apply_arg_attrs_to_abi_param(
+                        AbiParam::special(pointer_ty(tcx), ArgumentPurpose::StructArgument(size),),
+                        attrs
+                    )]
+                } else {
+                    smallvec![apply_arg_attrs_to_abi_param(AbiParam::new(pointer_ty(tcx)), attrs)]
+                }
+            }
+            PassMode::Indirect { attrs, meta_attrs: Some(meta_attrs), on_stack } => {
+                assert!(!on_stack);
+                smallvec![
+                    apply_arg_attrs_to_abi_param(AbiParam::new(pointer_ty(tcx)), attrs),
+                    apply_arg_attrs_to_abi_param(AbiParam::new(pointer_ty(tcx)), meta_attrs),
+                ]
+            }
+        }
+    }
+
+    fn get_abi_return(&self, tcx: TyCtxt<'tcx>) -> (Option<AbiParam>, Vec<AbiParam>) {
+        match self.mode {
+            PassMode::Ignore => (None, vec![]),
+            PassMode::Direct(_) => match self.layout.abi {
+                Abi::Scalar(scalar) => {
+                    (None, vec![AbiParam::new(scalar_to_clif_type(tcx, scalar))])
+                }
+                Abi::Vector { .. } => {
+                    let vector_ty = crate::intrinsics::clif_vector_type(tcx, self.layout);
+                    (None, vec![AbiParam::new(vector_ty)])
+                }
+                _ => unreachable!("{:?}", self.layout.abi),
+            },
+            PassMode::Pair(_, _) => match self.layout.abi {
+                Abi::ScalarPair(a, b) => {
+                    let a = scalar_to_clif_type(tcx, a);
+                    let b = scalar_to_clif_type(tcx, b);
+                    (None, vec![AbiParam::new(a), AbiParam::new(b)])
+                }
+                _ => unreachable!("{:?}", self.layout.abi),
+            },
+            PassMode::Cast { ref cast, .. } => {
+                (None, cast_target_to_abi_params(cast).into_iter().collect())
+            }
+            PassMode::Indirect { attrs: _, meta_attrs: None, on_stack } => {
+                assert!(!on_stack);
+                (Some(AbiParam::special(pointer_ty(tcx), ArgumentPurpose::StructReturn)), vec![])
+            }
+            PassMode::Indirect { attrs: _, meta_attrs: Some(_), on_stack: _ } => {
+                unreachable!("unsized return value")
+            }
+        }
+    }
+}
+
+pub(super) fn to_casted_value<'tcx>(
+    fx: &mut FunctionCx<'_, '_, 'tcx>,
+    arg: CValue<'tcx>,
+    cast: &CastTarget,
+) -> SmallVec<[Value; 2]> {
+    let (ptr, meta) = arg.force_stack(fx);
+    assert!(meta.is_none());
+    let mut offset = 0;
+    cast_target_to_abi_params(cast)
+        .into_iter()
+        .map(|param| {
+            let val = ptr.offset_i64(fx, offset).load(fx, param.value_type, MemFlags::new());
+            offset += i64::from(param.value_type.bytes());
+            val
+        })
+        .collect()
+}
+
+pub(super) fn from_casted_value<'tcx>(
+    fx: &mut FunctionCx<'_, '_, 'tcx>,
+    block_params: &[Value],
+    layout: TyAndLayout<'tcx>,
+    cast: &CastTarget,
+) -> CValue<'tcx> {
+    let abi_params = cast_target_to_abi_params(cast);
+    let abi_param_size: u32 = abi_params.iter().map(|param| param.value_type.bytes()).sum();
+    let layout_size = u32::try_from(layout.size.bytes()).unwrap();
+    let ptr = fx.create_stack_slot(
+        // Stack slot size may be bigger for example `[u8; 3]` which is packed into an `i32`.
+        // It may also be smaller for example when the type is a wrapper around an integer with a
+        // larger alignment than the integer.
+        std::cmp::max(abi_param_size, layout_size),
+        u32::try_from(layout.align.pref.bytes()).unwrap(),
+    );
+    let mut offset = 0;
+    let mut block_params_iter = block_params.iter().copied();
+    for param in abi_params {
+        let val = ptr.offset_i64(fx, offset).store(
+            fx,
+            block_params_iter.next().unwrap(),
+            MemFlags::new(),
+        );
+        offset += i64::from(param.value_type.bytes());
+        val
+    }
+    assert_eq!(block_params_iter.next(), None, "Leftover block param");
+    CValue::by_ref(ptr, layout)
+}
+
+/// Get a set of values to be passed as function arguments.
+pub(super) fn adjust_arg_for_abi<'tcx>(
+    fx: &mut FunctionCx<'_, '_, 'tcx>,
+    arg: CValue<'tcx>,
+    arg_abi: &ArgAbi<'tcx, Ty<'tcx>>,
+    is_owned: bool,
+) -> SmallVec<[Value; 2]> {
+    assert_assignable(fx, arg.layout().ty, arg_abi.layout.ty, 16);
+    match arg_abi.mode {
+        PassMode::Ignore => smallvec![],
+        PassMode::Direct(_) => smallvec![arg.load_scalar(fx)],
+        PassMode::Pair(_, _) => {
+            let (a, b) = arg.load_scalar_pair(fx);
+            smallvec![a, b]
+        }
+        PassMode::Cast { ref cast, .. } => to_casted_value(fx, arg, cast),
+        PassMode::Indirect { .. } => {
+            if is_owned {
+                match arg.force_stack(fx) {
+                    (ptr, None) => smallvec![ptr.get_addr(fx)],
+                    (ptr, Some(meta)) => smallvec![ptr.get_addr(fx), meta],
+                }
+            } else {
+                // Ownership of the value at the backing storage for an argument is passed to the
+                // callee per the ABI, so we must make a copy of the argument unless the argument
+                // local is moved.
+                let place = CPlace::new_stack_slot(fx, arg.layout());
+                place.write_cvalue(fx, arg);
+                smallvec![place.to_ptr().get_addr(fx)]
+            }
+        }
+    }
+}
+
+/// Create a [`CValue`] containing the value of a function parameter adding clif function parameters
+/// as necessary.
+pub(super) fn cvalue_for_param<'tcx>(
+    fx: &mut FunctionCx<'_, '_, 'tcx>,
+    local: Option<mir::Local>,
+    local_field: Option<usize>,
+    arg_abi: &ArgAbi<'tcx, Ty<'tcx>>,
+    block_params_iter: &mut impl Iterator<Item = Value>,
+) -> Option<CValue<'tcx>> {
+    let block_params = arg_abi
+        .get_abi_param(fx.tcx)
+        .into_iter()
+        .map(|abi_param| {
+            let block_param = block_params_iter.next().unwrap();
+            assert_eq!(fx.bcx.func.dfg.value_type(block_param), abi_param.value_type);
+            block_param
+        })
+        .collect::<SmallVec<[_; 2]>>();
+
+    crate::abi::comments::add_arg_comment(
+        fx,
+        "arg",
+        local,
+        local_field,
+        &block_params,
+        &arg_abi.mode,
+        arg_abi.layout,
+    );
+
+    match arg_abi.mode {
+        PassMode::Ignore => None,
+        PassMode::Direct(_) => {
+            assert_eq!(block_params.len(), 1, "{:?}", block_params);
+            Some(CValue::by_val(block_params[0], arg_abi.layout))
+        }
+        PassMode::Pair(_, _) => {
+            assert_eq!(block_params.len(), 2, "{:?}", block_params);
+            Some(CValue::by_val_pair(block_params[0], block_params[1], arg_abi.layout))
+        }
+        PassMode::Cast { ref cast, .. } => {
+            Some(from_casted_value(fx, &block_params, arg_abi.layout, cast))
+        }
+        PassMode::Indirect { attrs: _, meta_attrs: None, on_stack: _ } => {
+            assert_eq!(block_params.len(), 1, "{:?}", block_params);
+            Some(CValue::by_ref(Pointer::new(block_params[0]), arg_abi.layout))
+        }
+        PassMode::Indirect { attrs: _, meta_attrs: Some(_), on_stack: _ } => {
+            assert_eq!(block_params.len(), 2, "{:?}", block_params);
+            Some(CValue::by_ref_unsized(
+                Pointer::new(block_params[0]),
+                block_params[1],
+                arg_abi.layout,
+            ))
+        }
+    }
+}
diff --git a/compiler/rustc_codegen_cranelift/src/abi/returning.rs b/compiler/rustc_codegen_cranelift/src/abi/returning.rs
new file mode 100644
index 00000000000..e0f399e616e
--- /dev/null
+++ b/compiler/rustc_codegen_cranelift/src/abi/returning.rs
@@ -0,0 +1,130 @@
+//! Return value handling
+
+use rustc_target::abi::call::{ArgAbi, PassMode};
+use smallvec::{smallvec, SmallVec};
+
+use crate::prelude::*;
+
+/// Return a place where the return value of the current function can be written to. If necessary
+/// this adds an extra parameter pointing to where the return value needs to be stored.
+pub(super) fn codegen_return_param<'tcx>(
+    fx: &mut FunctionCx<'_, '_, 'tcx>,
+    ssa_analyzed: &rustc_index::IndexSlice<Local, crate::analyze::SsaKind>,
+    block_params_iter: &mut impl Iterator<Item = Value>,
+) -> CPlace<'tcx> {
+    let (ret_place, ret_param): (_, SmallVec<[_; 2]>) = match fx.fn_abi.ret.mode {
+        PassMode::Ignore | PassMode::Direct(_) | PassMode::Pair(_, _) | PassMode::Cast { .. } => {
+            let is_ssa = ssa_analyzed[RETURN_PLACE].is_ssa(fx, fx.fn_abi.ret.layout.ty);
+            (super::make_local_place(fx, RETURN_PLACE, fx.fn_abi.ret.layout, is_ssa), smallvec![])
+        }
+        PassMode::Indirect { attrs: _, meta_attrs: None, on_stack: _ } => {
+            let ret_param = block_params_iter.next().unwrap();
+            assert_eq!(fx.bcx.func.dfg.value_type(ret_param), fx.pointer_type);
+            (CPlace::for_ptr(Pointer::new(ret_param), fx.fn_abi.ret.layout), smallvec![ret_param])
+        }
+        PassMode::Indirect { attrs: _, meta_attrs: Some(_), on_stack: _ } => {
+            unreachable!("unsized return value")
+        }
+    };
+
+    crate::abi::comments::add_arg_comment(
+        fx,
+        "ret",
+        Some(RETURN_PLACE),
+        None,
+        &ret_param,
+        &fx.fn_abi.ret.mode,
+        fx.fn_abi.ret.layout,
+    );
+
+    ret_place
+}
+
+/// Invokes the closure with if necessary a value representing the return pointer. When the closure
+/// returns the call return value(s) if any are written to the correct place.
+pub(super) fn codegen_with_call_return_arg<'tcx>(
+    fx: &mut FunctionCx<'_, '_, 'tcx>,
+    ret_arg_abi: &ArgAbi<'tcx, Ty<'tcx>>,
+    ret_place: CPlace<'tcx>,
+    f: impl FnOnce(&mut FunctionCx<'_, '_, 'tcx>, Option<Value>) -> Inst,
+) {
+    let (ret_temp_place, return_ptr) = match ret_arg_abi.mode {
+        PassMode::Ignore => (None, None),
+        PassMode::Indirect { attrs: _, meta_attrs: None, on_stack: _ } => {
+            if let Some(ret_ptr) = ret_place.try_to_ptr() {
+                // This is an optimization to prevent unnecessary copies of the return value when
+                // the return place is already a memory place as opposed to a register.
+                // This match arm can be safely removed.
+                (None, Some(ret_ptr.get_addr(fx)))
+            } else {
+                let place = CPlace::new_stack_slot(fx, ret_arg_abi.layout);
+                (Some(place), Some(place.to_ptr().get_addr(fx)))
+            }
+        }
+        PassMode::Indirect { attrs: _, meta_attrs: Some(_), on_stack: _ } => {
+            unreachable!("unsized return value")
+        }
+        PassMode::Direct(_) | PassMode::Pair(_, _) | PassMode::Cast { .. } => (None, None),
+    };
+
+    let call_inst = f(fx, return_ptr);
+
+    match ret_arg_abi.mode {
+        PassMode::Ignore => {}
+        PassMode::Direct(_) => {
+            let ret_val = fx.bcx.inst_results(call_inst)[0];
+            ret_place.write_cvalue(fx, CValue::by_val(ret_val, ret_arg_abi.layout));
+        }
+        PassMode::Pair(_, _) => {
+            let ret_val_a = fx.bcx.inst_results(call_inst)[0];
+            let ret_val_b = fx.bcx.inst_results(call_inst)[1];
+            ret_place
+                .write_cvalue(fx, CValue::by_val_pair(ret_val_a, ret_val_b, ret_arg_abi.layout));
+        }
+        PassMode::Cast { ref cast, .. } => {
+            let results =
+                fx.bcx.inst_results(call_inst).iter().copied().collect::<SmallVec<[Value; 2]>>();
+            let result =
+                super::pass_mode::from_casted_value(fx, &results, ret_place.layout(), cast);
+            ret_place.write_cvalue(fx, result);
+        }
+        PassMode::Indirect { attrs: _, meta_attrs: None, on_stack: _ } => {
+            if let Some(ret_temp_place) = ret_temp_place {
+                // If ret_temp_place is None, it is not necessary to copy the return value.
+                let ret_temp_value = ret_temp_place.to_cvalue(fx);
+                ret_place.write_cvalue(fx, ret_temp_value);
+            }
+        }
+        PassMode::Indirect { attrs: _, meta_attrs: Some(_), on_stack: _ } => {
+            unreachable!("unsized return value")
+        }
+    }
+}
+
+/// Codegen a return instruction with the right return value(s) if any.
+pub(crate) fn codegen_return(fx: &mut FunctionCx<'_, '_, '_>) {
+    match fx.fn_abi.ret.mode {
+        PassMode::Ignore | PassMode::Indirect { attrs: _, meta_attrs: None, on_stack: _ } => {
+            fx.bcx.ins().return_(&[]);
+        }
+        PassMode::Indirect { attrs: _, meta_attrs: Some(_), on_stack: _ } => {
+            unreachable!("unsized return value")
+        }
+        PassMode::Direct(_) => {
+            let place = fx.get_local_place(RETURN_PLACE);
+            let ret_val = place.to_cvalue(fx).load_scalar(fx);
+            fx.bcx.ins().return_(&[ret_val]);
+        }
+        PassMode::Pair(_, _) => {
+            let place = fx.get_local_place(RETURN_PLACE);
+            let (ret_val_a, ret_val_b) = place.to_cvalue(fx).load_scalar_pair(fx);
+            fx.bcx.ins().return_(&[ret_val_a, ret_val_b]);
+        }
+        PassMode::Cast { ref cast, .. } => {
+            let place = fx.get_local_place(RETURN_PLACE);
+            let ret_val = place.to_cvalue(fx);
+            let ret_vals = super::pass_mode::to_casted_value(fx, ret_val, cast);
+            fx.bcx.ins().return_(&ret_vals);
+        }
+    }
+}
diff --git a/compiler/rustc_codegen_cranelift/src/allocator.rs b/compiler/rustc_codegen_cranelift/src/allocator.rs
new file mode 100644
index 00000000000..e8af3e8c255
--- /dev/null
+++ b/compiler/rustc_codegen_cranelift/src/allocator.rs
@@ -0,0 +1,104 @@
+//! Allocator shim
+// Adapted from rustc
+
+use rustc_ast::expand::allocator::{
+    alloc_error_handler_name, default_fn_name, global_fn_name, AllocatorKind, AllocatorTy,
+    ALLOCATOR_METHODS, NO_ALLOC_SHIM_IS_UNSTABLE,
+};
+use rustc_codegen_ssa::base::allocator_kind_for_codegen;
+use rustc_session::config::OomStrategy;
+
+use crate::prelude::*;
+
+/// Returns whether an allocator shim was created
+pub(crate) fn codegen(
+    tcx: TyCtxt<'_>,
+    module: &mut impl Module,
+    unwind_context: &mut UnwindContext,
+) -> bool {
+    let Some(kind) = allocator_kind_for_codegen(tcx) else { return false };
+    codegen_inner(
+        module,
+        unwind_context,
+        kind,
+        tcx.alloc_error_handler_kind(()).unwrap(),
+        tcx.sess.opts.unstable_opts.oom,
+    );
+    true
+}
+
+fn codegen_inner(
+    module: &mut impl Module,
+    unwind_context: &mut UnwindContext,
+    kind: AllocatorKind,
+    alloc_error_handler_kind: AllocatorKind,
+    oom_strategy: OomStrategy,
+) {
+    let usize_ty = module.target_config().pointer_type();
+
+    if kind == AllocatorKind::Default {
+        for method in ALLOCATOR_METHODS {
+            let mut arg_tys = Vec::with_capacity(method.inputs.len());
+            for input in method.inputs.iter() {
+                match input.ty {
+                    AllocatorTy::Layout => {
+                        arg_tys.push(usize_ty); // size
+                        arg_tys.push(usize_ty); // align
+                    }
+                    AllocatorTy::Ptr => arg_tys.push(usize_ty),
+                    AllocatorTy::Usize => arg_tys.push(usize_ty),
+
+                    AllocatorTy::ResultPtr | AllocatorTy::Unit => panic!("invalid allocator arg"),
+                }
+            }
+            let output = match method.output {
+                AllocatorTy::ResultPtr => Some(usize_ty),
+                AllocatorTy::Unit => None,
+
+                AllocatorTy::Layout | AllocatorTy::Usize | AllocatorTy::Ptr => {
+                    panic!("invalid allocator output")
+                }
+            };
+
+            let sig = Signature {
+                call_conv: module.target_config().default_call_conv,
+                params: arg_tys.iter().cloned().map(AbiParam::new).collect(),
+                returns: output.into_iter().map(AbiParam::new).collect(),
+            };
+            crate::common::create_wrapper_function(
+                module,
+                unwind_context,
+                sig,
+                &global_fn_name(method.name),
+                &default_fn_name(method.name),
+            );
+        }
+    }
+
+    let sig = Signature {
+        call_conv: module.target_config().default_call_conv,
+        params: vec![AbiParam::new(usize_ty), AbiParam::new(usize_ty)],
+        returns: vec![],
+    };
+    crate::common::create_wrapper_function(
+        module,
+        unwind_context,
+        sig,
+        "__rust_alloc_error_handler",
+        &alloc_error_handler_name(alloc_error_handler_kind),
+    );
+
+    let data_id = module.declare_data(OomStrategy::SYMBOL, Linkage::Export, false, false).unwrap();
+    let mut data = DataDescription::new();
+    data.set_align(1);
+    let val = oom_strategy.should_panic();
+    data.define(Box::new([val]));
+    module.define_data(data_id, &data).unwrap();
+
+    let data_id =
+        module.declare_data(NO_ALLOC_SHIM_IS_UNSTABLE, Linkage::Export, false, false).unwrap();
+    let mut data = DataDescription::new();
+    data.set_align(1);
+    data.define(Box::new([0]));
+    module.define_data(data_id, &data).unwrap();
+}
diff --git a/compiler/rustc_codegen_cranelift/src/analyze.rs b/compiler/rustc_codegen_cranelift/src/analyze.rs
new file mode 100644
index 00000000000..c5762638a6b
--- /dev/null
+++ b/compiler/rustc_codegen_cranelift/src/analyze.rs
@@ -0,0 +1,39 @@
+//! SSA analysis
+
+use rustc_index::IndexVec;
+use rustc_middle::mir::StatementKind::*;
+
+use crate::prelude::*;
+
+#[derive(Copy, Clone, Debug, PartialEq, Eq, Hash)]
+pub(crate) enum SsaKind {
+    NotSsa,
+    MaybeSsa,
+}
+
+impl SsaKind {
+    pub(crate) fn is_ssa<'tcx>(self, fx: &FunctionCx<'_, '_, 'tcx>, ty: Ty<'tcx>) -> bool {
+        self == SsaKind::MaybeSsa && (fx.clif_type(ty).is_some() || fx.clif_pair_type(ty).is_some())
+    }
+}
+
+pub(crate) fn analyze(fx: &FunctionCx<'_, '_, '_>) -> IndexVec<Local, SsaKind> {
+    let mut flag_map =
+        fx.mir.local_decls.iter().map(|_| SsaKind::MaybeSsa).collect::<IndexVec<Local, SsaKind>>();
+
+    for bb in fx.mir.basic_blocks.iter() {
+        for stmt in bb.statements.iter() {
+            match &stmt.kind {
+                Assign(place_and_rval) => match &place_and_rval.1 {
+                    Rvalue::Ref(_, _, place) | Rvalue::AddressOf(_, place) => {
+                        flag_map[place.local] = SsaKind::NotSsa;
+                    }
+                    _ => {}
+                },
+                _ => {}
+            }
+        }
+    }
+
+    flag_map
+}
diff --git a/compiler/rustc_codegen_cranelift/src/archive.rs b/compiler/rustc_codegen_cranelift/src/archive.rs
new file mode 100644
index 00000000000..414d3db1c51
--- /dev/null
+++ b/compiler/rustc_codegen_cranelift/src/archive.rs
@@ -0,0 +1,25 @@
+use std::path::{Path, PathBuf};
+
+use rustc_codegen_ssa::back::archive::{
+    get_native_object_symbols, ArArchiveBuilder, ArchiveBuilder, ArchiveBuilderBuilder,
+};
+use rustc_session::Session;
+
+pub(crate) struct ArArchiveBuilderBuilder;
+
+impl ArchiveBuilderBuilder for ArArchiveBuilderBuilder {
+    fn new_archive_builder<'a>(&self, sess: &'a Session) -> Box<dyn ArchiveBuilder + 'a> {
+        Box::new(ArArchiveBuilder::new(sess, get_native_object_symbols))
+    }
+
+    fn create_dll_import_lib(
+        &self,
+        _sess: &Session,
+        _lib_name: &str,
+        _dll_imports: &[rustc_session::cstore::DllImport],
+        _tmpdir: &Path,
+        _is_direct_dependency: bool,
+    ) -> PathBuf {
+        unimplemented!("creating dll imports is not yet supported");
+    }
+}
diff --git a/compiler/rustc_codegen_cranelift/src/base.rs b/compiler/rustc_codegen_cranelift/src/base.rs
new file mode 100644
index 00000000000..5846689643f
--- /dev/null
+++ b/compiler/rustc_codegen_cranelift/src/base.rs
@@ -0,0 +1,1073 @@
+//! Codegen of a single function
+
+use cranelift_codegen::ir::UserFuncName;
+use cranelift_codegen::CodegenError;
+use cranelift_frontend::{FunctionBuilder, FunctionBuilderContext};
+use cranelift_module::ModuleError;
+use rustc_ast::InlineAsmOptions;
+use rustc_index::IndexVec;
+use rustc_middle::middle::codegen_fn_attrs::CodegenFnAttrFlags;
+use rustc_middle::ty::adjustment::PointerCoercion;
+use rustc_middle::ty::layout::FnAbiOf;
+use rustc_middle::ty::print::with_no_trimmed_paths;
+use rustc_middle::ty::TypeVisitableExt;
+use rustc_monomorphize::is_call_from_compiler_builtins_to_upstream_monomorphization;
+
+use crate::constant::ConstantCx;
+use crate::debuginfo::{FunctionDebugContext, TypeDebugContext};
+use crate::inline_asm::codegen_naked_asm;
+use crate::prelude::*;
+use crate::pretty_clif::CommentWriter;
+
+pub(crate) struct CodegenedFunction {
+    symbol_name: String,
+    func_id: FuncId,
+    func: Function,
+    clif_comments: CommentWriter,
+    func_debug_cx: Option<FunctionDebugContext>,
+}
+
+pub(crate) fn codegen_fn<'tcx>(
+    tcx: TyCtxt<'tcx>,
+    cx: &mut crate::CodegenCx,
+    type_dbg: &mut TypeDebugContext<'tcx>,
+    cached_func: Function,
+    module: &mut dyn Module,
+    instance: Instance<'tcx>,
+) -> Option<CodegenedFunction> {
+    debug_assert!(!instance.args.has_infer());
+
+    let symbol_name = tcx.symbol_name(instance).name.to_string();
+    let _timer = tcx.prof.generic_activity_with_arg("codegen fn", &*symbol_name);
+
+    let mir = tcx.instance_mir(instance.def);
+    let _mir_guard = crate::PrintOnPanic(|| {
+        let mut buf = Vec::new();
+        with_no_trimmed_paths!({
+            rustc_middle::mir::pretty::write_mir_fn(tcx, mir, &mut |_, _| Ok(()), &mut buf)
+                .unwrap();
+        });
+        String::from_utf8_lossy(&buf).into_owned()
+    });
+
+    if tcx.codegen_fn_attrs(instance.def_id()).flags.contains(CodegenFnAttrFlags::NAKED) {
+        assert_eq!(mir.basic_blocks.len(), 1);
+        assert!(mir.basic_blocks[START_BLOCK].statements.is_empty());
+
+        match &mir.basic_blocks[START_BLOCK].terminator().kind {
+            TerminatorKind::InlineAsm {
+                template,
+                operands,
+                options,
+                line_spans: _,
+                targets: _,
+                unwind: _,
+            } => {
+                codegen_naked_asm(
+                    tcx,
+                    cx,
+                    module,
+                    instance,
+                    mir.basic_blocks[START_BLOCK].terminator().source_info.span,
+                    &symbol_name,
+                    template,
+                    operands,
+                    *options,
+                );
+            }
+            _ => unreachable!(),
+        }
+
+        return None;
+    }
+
+    // Declare function
+    let sig = get_function_sig(tcx, module.target_config().default_call_conv, instance);
+    let func_id = module.declare_function(&symbol_name, Linkage::Local, &sig).unwrap();
+
+    // Make the FunctionBuilder
+    let mut func_ctx = FunctionBuilderContext::new();
+    let mut func = cached_func;
+    func.clear();
+    func.name = UserFuncName::user(0, func_id.as_u32());
+    func.signature = sig;
+    func.collect_debug_info();
+
+    let mut bcx = FunctionBuilder::new(&mut func, &mut func_ctx);
+
+    // Predefine blocks
+    let start_block = bcx.create_block();
+    let block_map: IndexVec<BasicBlock, Block> =
+        (0..mir.basic_blocks.len()).map(|_| bcx.create_block()).collect();
+
+    // Make FunctionCx
+    let target_config = module.target_config();
+    let pointer_type = target_config.pointer_type();
+    let clif_comments = crate::pretty_clif::CommentWriter::new(tcx, instance);
+
+    let fn_abi = RevealAllLayoutCx(tcx).fn_abi_of_instance(instance, ty::List::empty());
+
+    let func_debug_cx = if let Some(debug_context) = &mut cx.debug_context {
+        Some(debug_context.define_function(tcx, type_dbg, instance, fn_abi, &symbol_name, mir.span))
+    } else {
+        None
+    };
+
+    let mut fx = FunctionCx {
+        cx,
+        module,
+        tcx,
+        target_config,
+        pointer_type,
+        constants_cx: ConstantCx::new(),
+        func_debug_cx,
+
+        instance,
+        symbol_name,
+        mir,
+        fn_abi,
+
+        bcx,
+        block_map,
+        local_map: IndexVec::with_capacity(mir.local_decls.len()),
+        caller_location: None, // set by `codegen_fn_prelude`
+
+        clif_comments,
+        next_ssa_var: 0,
+    };
+
+    tcx.prof.generic_activity("codegen clif ir").run(|| codegen_fn_body(&mut fx, start_block));
+    fx.bcx.seal_all_blocks();
+    fx.bcx.finalize();
+
+    // Recover all necessary data from fx, before accessing func will prevent future access to it.
+    let symbol_name = fx.symbol_name;
+    let clif_comments = fx.clif_comments;
+    let func_debug_cx = fx.func_debug_cx;
+
+    fx.constants_cx.finalize(fx.tcx, &mut *fx.module);
+
+    if cx.should_write_ir {
+        crate::pretty_clif::write_clif_file(
+            tcx.output_filenames(()),
+            &symbol_name,
+            "unopt",
+            module.isa(),
+            &func,
+            &clif_comments,
+        );
+    }
+
+    // Verify function
+    verify_func(tcx, &clif_comments, &func);
+
+    Some(CodegenedFunction { symbol_name, func_id, func, clif_comments, func_debug_cx })
+}
+
+pub(crate) fn compile_fn(
+    cx: &mut crate::CodegenCx,
+    cached_context: &mut Context,
+    module: &mut dyn Module,
+    codegened_func: CodegenedFunction,
+) {
+    let _timer =
+        cx.profiler.generic_activity_with_arg("compile function", &*codegened_func.symbol_name);
+
+    let clif_comments = codegened_func.clif_comments;
+
+    // Store function in context
+    let context = cached_context;
+    context.clear();
+    context.func = codegened_func.func;
+
+    #[cfg(any())] // This is never true
+    let _clif_guard = {
+        use std::fmt::Write;
+
+        let func_clone = context.func.clone();
+        let clif_comments_clone = clif_comments.clone();
+        let mut clif = String::new();
+        for flag in module.isa().flags().iter() {
+            writeln!(clif, "set {}", flag).unwrap();
+        }
+        write!(clif, "target {}", module.isa().triple().architecture.to_string()).unwrap();
+        for isa_flag in module.isa().isa_flags().iter() {
+            write!(clif, " {}", isa_flag).unwrap();
+        }
+        writeln!(clif, "\n").unwrap();
+        writeln!(clif, "; symbol {}", codegened_func.symbol_name).unwrap();
+        crate::PrintOnPanic(move || {
+            let mut clif = clif.clone();
+            ::cranelift_codegen::write::decorate_function(
+                &mut &clif_comments_clone,
+                &mut clif,
+                &func_clone,
+            )
+            .unwrap();
+            clif
+        })
+    };
+
+    // Define function
+    cx.profiler.generic_activity("define function").run(|| {
+        context.want_disasm = cx.should_write_ir;
+        match module.define_function(codegened_func.func_id, context) {
+            Ok(()) => {}
+            Err(ModuleError::Compilation(CodegenError::ImplLimitExceeded)) => {
+                let early_dcx = rustc_session::EarlyDiagCtxt::new(
+                    rustc_session::config::ErrorOutputType::default(),
+                );
+                early_dcx.early_fatal(format!(
+                    "backend implementation limit exceeded while compiling {name}",
+                    name = codegened_func.symbol_name
+                ));
+            }
+            Err(err) => {
+                panic!("Error while defining {name}: {err:?}", name = codegened_func.symbol_name);
+            }
+        }
+    });
+
+    if cx.should_write_ir {
+        // Write optimized function to file for debugging
+        crate::pretty_clif::write_clif_file(
+            &cx.output_filenames,
+            &codegened_func.symbol_name,
+            "opt",
+            module.isa(),
+            &context.func,
+            &clif_comments,
+        );
+
+        if let Some(disasm) = &context.compiled_code().unwrap().vcode {
+            crate::pretty_clif::write_ir_file(
+                &cx.output_filenames,
+                &format!("{}.vcode", codegened_func.symbol_name),
+                |file| file.write_all(disasm.as_bytes()),
+            )
+        }
+    }
+
+    // Define debuginfo for function
+    let isa = module.isa();
+    let debug_context = &mut cx.debug_context;
+    let unwind_context = &mut cx.unwind_context;
+    cx.profiler.generic_activity("generate debug info").run(|| {
+        if let Some(debug_context) = debug_context {
+            codegened_func.func_debug_cx.unwrap().finalize(
+                debug_context,
+                codegened_func.func_id,
+                context,
+            );
+        }
+        unwind_context.add_function(codegened_func.func_id, &context, isa);
+    });
+}
+
+pub(crate) fn verify_func(
+    tcx: TyCtxt<'_>,
+    writer: &crate::pretty_clif::CommentWriter,
+    func: &Function,
+) {
+    tcx.prof.generic_activity("verify clif ir").run(|| {
+        let flags = cranelift_codegen::settings::Flags::new(cranelift_codegen::settings::builder());
+        match cranelift_codegen::verify_function(&func, &flags) {
+            Ok(_) => {}
+            Err(err) => {
+                tcx.dcx().err(format!("{:?}", err));
+                let pretty_error = cranelift_codegen::print_errors::pretty_verifier_error(
+                    &func,
+                    Some(Box::new(writer)),
+                    err,
+                );
+                tcx.dcx().fatal(format!("cranelift verify error:\n{}", pretty_error));
+            }
+        }
+    });
+}
+
+fn codegen_fn_body(fx: &mut FunctionCx<'_, '_, '_>, start_block: Block) {
+    let arg_uninhabited = fx
+        .mir
+        .args_iter()
+        .any(|arg| fx.layout_of(fx.monomorphize(fx.mir.local_decls[arg].ty)).abi.is_uninhabited());
+    if arg_uninhabited {
+        fx.bcx.append_block_params_for_function_params(fx.block_map[START_BLOCK]);
+        fx.bcx.switch_to_block(fx.block_map[START_BLOCK]);
+        fx.bcx.ins().trap(TrapCode::UnreachableCodeReached);
+        return;
+    }
+    fx.tcx
+        .prof
+        .generic_activity("codegen prelude")
+        .run(|| crate::abi::codegen_fn_prelude(fx, start_block));
+
+    let reachable_blocks = traversal::mono_reachable_as_bitset(fx.mir, fx.tcx, fx.instance);
+
+    for (bb, bb_data) in fx.mir.basic_blocks.iter_enumerated() {
+        let block = fx.get_block(bb);
+        fx.bcx.switch_to_block(block);
+
+        if !reachable_blocks.contains(bb) {
+            // We want to skip this block, because it's not reachable. But we still create
+            // the block so terminators in other blocks can reference it.
+            fx.bcx.ins().trap(TrapCode::UnreachableCodeReached);
+            continue;
+        }
+
+        if bb_data.is_cleanup {
+            // Unwinding after panicking is not supported
+            continue;
+
+            // FIXME Once unwinding is supported and Cranelift supports marking blocks as cold, do
+            // so for cleanup blocks.
+        }
+
+        fx.bcx.ins().nop();
+        for stmt in &bb_data.statements {
+            fx.set_debug_loc(stmt.source_info);
+            codegen_stmt(fx, block, stmt);
+        }
+
+        if fx.clif_comments.enabled() {
+            let mut terminator_head = "\n".to_string();
+            with_no_trimmed_paths!({
+                bb_data.terminator().kind.fmt_head(&mut terminator_head).unwrap();
+            });
+            let inst = fx.bcx.func.layout.last_inst(block).unwrap();
+            fx.add_comment(inst, terminator_head);
+        }
+
+        let source_info = bb_data.terminator().source_info;
+        fx.set_debug_loc(source_info);
+
+        let _print_guard =
+            crate::PrintOnPanic(|| format!("terminator {:?}", bb_data.terminator().kind));
+
+        match &bb_data.terminator().kind {
+            TerminatorKind::Goto { target } => {
+                if let TerminatorKind::Return = fx.mir[*target].terminator().kind {
+                    let mut can_immediately_return = true;
+                    for stmt in &fx.mir[*target].statements {
+                        if let StatementKind::StorageDead(_) = stmt.kind {
+                        } else {
+                            // FIXME Can sometimes happen, see rust-lang/rust#70531
+                            can_immediately_return = false;
+                            break;
+                        }
+                    }
+
+                    if can_immediately_return {
+                        crate::abi::codegen_return(fx);
+                        continue;
+                    }
+                }
+
+                let block = fx.get_block(*target);
+                fx.bcx.ins().jump(block, &[]);
+            }
+            TerminatorKind::Return => {
+                crate::abi::codegen_return(fx);
+            }
+            TerminatorKind::Assert { cond, expected, msg, target, unwind: _ } => {
+                if !fx.tcx.sess.overflow_checks() && msg.is_optional_overflow_check() {
+                    let target = fx.get_block(*target);
+                    fx.bcx.ins().jump(target, &[]);
+                    continue;
+                }
+                let cond = codegen_operand(fx, cond).load_scalar(fx);
+
+                let target = fx.get_block(*target);
+                let failure = fx.bcx.create_block();
+                fx.bcx.set_cold_block(failure);
+
+                if *expected {
+                    fx.bcx.ins().brif(cond, target, &[], failure, &[]);
+                } else {
+                    fx.bcx.ins().brif(cond, failure, &[], target, &[]);
+                };
+
+                fx.bcx.switch_to_block(failure);
+                fx.bcx.ins().nop();
+
+                match &**msg {
+                    AssertKind::BoundsCheck { ref len, ref index } => {
+                        let len = codegen_operand(fx, len).load_scalar(fx);
+                        let index = codegen_operand(fx, index).load_scalar(fx);
+                        let location = fx.get_caller_location(source_info).load_scalar(fx);
+
+                        codegen_panic_inner(
+                            fx,
+                            rustc_hir::LangItem::PanicBoundsCheck,
+                            &[index, len, location],
+                            Some(source_info.span),
+                        );
+                    }
+                    AssertKind::MisalignedPointerDereference { ref required, ref found } => {
+                        let required = codegen_operand(fx, required).load_scalar(fx);
+                        let found = codegen_operand(fx, found).load_scalar(fx);
+                        let location = fx.get_caller_location(source_info).load_scalar(fx);
+
+                        codegen_panic_inner(
+                            fx,
+                            rustc_hir::LangItem::PanicMisalignedPointerDereference,
+                            &[required, found, location],
+                            Some(source_info.span),
+                        );
+                    }
+                    _ => {
+                        let location = fx.get_caller_location(source_info).load_scalar(fx);
+
+                        codegen_panic_inner(
+                            fx,
+                            msg.panic_function(),
+                            &[location],
+                            Some(source_info.span),
+                        );
+                    }
+                }
+            }
+
+            TerminatorKind::SwitchInt { discr, targets } => {
+                let discr = codegen_operand(fx, discr);
+                let switch_ty = discr.layout().ty;
+                let discr = discr.load_scalar(fx);
+
+                let use_bool_opt = switch_ty.kind() == fx.tcx.types.bool.kind()
+                    || (targets.iter().count() == 1 && targets.iter().next().unwrap().0 == 0);
+                if use_bool_opt {
+                    assert_eq!(targets.iter().count(), 1);
+                    let (then_value, then_block) = targets.iter().next().unwrap();
+                    let then_block = fx.get_block(then_block);
+                    let else_block = fx.get_block(targets.otherwise());
+                    let test_zero = match then_value {
+                        0 => true,
+                        1 => false,
+                        _ => unreachable!("{:?}", targets),
+                    };
+
+                    let (discr, is_inverted) =
+                        crate::optimize::peephole::maybe_unwrap_bool_not(&mut fx.bcx, discr);
+                    let test_zero = if is_inverted { !test_zero } else { test_zero };
+                    if let Some(taken) = crate::optimize::peephole::maybe_known_branch_taken(
+                        &fx.bcx, discr, test_zero,
+                    ) {
+                        if taken {
+                            fx.bcx.ins().jump(then_block, &[]);
+                        } else {
+                            fx.bcx.ins().jump(else_block, &[]);
+                        }
+                    } else {
+                        if test_zero {
+                            fx.bcx.ins().brif(discr, else_block, &[], then_block, &[]);
+                        } else {
+                            fx.bcx.ins().brif(discr, then_block, &[], else_block, &[]);
+                        }
+                    }
+                } else {
+                    let mut switch = ::cranelift_frontend::Switch::new();
+                    for (value, block) in targets.iter() {
+                        let block = fx.get_block(block);
+                        switch.set_entry(value, block);
+                    }
+                    let otherwise_block = fx.get_block(targets.otherwise());
+                    switch.emit(&mut fx.bcx, discr, otherwise_block);
+                }
+            }
+            TerminatorKind::Call {
+                func,
+                args,
+                destination,
+                target,
+                fn_span,
+                unwind: _,
+                call_source: _,
+            } => {
+                fx.tcx.prof.generic_activity("codegen call").run(|| {
+                    crate::abi::codegen_terminator_call(
+                        fx,
+                        mir::SourceInfo { span: *fn_span, ..source_info },
+                        func,
+                        args,
+                        *destination,
+                        *target,
+                    )
+                });
+            }
+            TerminatorKind::InlineAsm {
+                template,
+                operands,
+                options,
+                targets,
+                line_spans: _,
+                unwind: _,
+            } => {
+                if options.contains(InlineAsmOptions::MAY_UNWIND) {
+                    fx.tcx.dcx().span_fatal(
+                        source_info.span,
+                        "cranelift doesn't support unwinding from inline assembly.",
+                    );
+                }
+
+                let have_labels = if options.contains(InlineAsmOptions::NORETURN) {
+                    !targets.is_empty()
+                } else {
+                    targets.len() > 1
+                };
+                if have_labels {
+                    fx.tcx.dcx().span_fatal(
+                        source_info.span,
+                        "cranelift doesn't support labels in inline assembly.",
+                    );
+                }
+
+                crate::inline_asm::codegen_inline_asm_terminator(
+                    fx,
+                    source_info.span,
+                    template,
+                    operands,
+                    *options,
+                    targets.get(0).copied(),
+                );
+            }
+            TerminatorKind::UnwindTerminate(reason) => {
+                codegen_unwind_terminate(fx, source_info, *reason);
+            }
+            TerminatorKind::UnwindResume => {
+                // FIXME implement unwinding
+                fx.bcx.ins().trap(TrapCode::UnreachableCodeReached);
+            }
+            TerminatorKind::Unreachable => {
+                fx.bcx.ins().trap(TrapCode::UnreachableCodeReached);
+            }
+            TerminatorKind::Yield { .. }
+            | TerminatorKind::FalseEdge { .. }
+            | TerminatorKind::FalseUnwind { .. }
+            | TerminatorKind::CoroutineDrop => {
+                bug!("shouldn't exist at codegen {:?}", bb_data.terminator());
+            }
+            TerminatorKind::Drop { place, target, unwind: _, replace: _ } => {
+                let drop_place = codegen_place(fx, *place);
+                crate::abi::codegen_drop(fx, source_info, drop_place);
+
+                let target_block = fx.get_block(*target);
+                fx.bcx.ins().jump(target_block, &[]);
+            }
+        };
+    }
+}
+
+fn codegen_stmt<'tcx>(
+    fx: &mut FunctionCx<'_, '_, 'tcx>,
+    #[allow(unused_variables)] cur_block: Block,
+    stmt: &Statement<'tcx>,
+) {
+    let _print_guard = crate::PrintOnPanic(|| format!("stmt {:?}", stmt));
+
+    fx.set_debug_loc(stmt.source_info);
+
+    match &stmt.kind {
+        StatementKind::StorageLive(..) | StatementKind::StorageDead(..) => {} // Those are not very useful
+        _ => {
+            if fx.clif_comments.enabled() {
+                let inst = fx.bcx.func.layout.last_inst(cur_block).unwrap();
+                with_no_trimmed_paths!({
+                    fx.add_comment(inst, format!("{:?}", stmt));
+                });
+            }
+        }
+    }
+
+    match &stmt.kind {
+        StatementKind::SetDiscriminant { place, variant_index } => {
+            let place = codegen_place(fx, **place);
+            crate::discriminant::codegen_set_discriminant(fx, place, *variant_index);
+        }
+        StatementKind::Assign(to_place_and_rval) => {
+            let lval = codegen_place(fx, to_place_and_rval.0);
+            let dest_layout = lval.layout();
+            match to_place_and_rval.1 {
+                Rvalue::Use(ref operand) => {
+                    let val = codegen_operand(fx, operand);
+                    lval.write_cvalue(fx, val);
+                }
+                Rvalue::CopyForDeref(place) => {
+                    let cplace = codegen_place(fx, place);
+                    let val = cplace.to_cvalue(fx);
+                    lval.write_cvalue(fx, val)
+                }
+                Rvalue::Ref(_, _, place) | Rvalue::AddressOf(_, place) => {
+                    let place = codegen_place(fx, place);
+                    let ref_ = place.place_ref(fx, lval.layout());
+                    lval.write_cvalue(fx, ref_);
+                }
+                Rvalue::ThreadLocalRef(def_id) => {
+                    let val = crate::constant::codegen_tls_ref(fx, def_id, lval.layout());
+                    lval.write_cvalue(fx, val);
+                }
+                Rvalue::BinaryOp(bin_op, ref lhs_rhs) => {
+                    let lhs = codegen_operand(fx, &lhs_rhs.0);
+                    let rhs = codegen_operand(fx, &lhs_rhs.1);
+
+                    let res = crate::num::codegen_binop(fx, bin_op, lhs, rhs);
+                    lval.write_cvalue(fx, res);
+                }
+                Rvalue::CheckedBinaryOp(bin_op, ref lhs_rhs) => {
+                    let lhs = codegen_operand(fx, &lhs_rhs.0);
+                    let rhs = codegen_operand(fx, &lhs_rhs.1);
+
+                    let res = crate::num::codegen_checked_int_binop(fx, bin_op, lhs, rhs);
+                    lval.write_cvalue(fx, res);
+                }
+                Rvalue::UnaryOp(un_op, ref operand) => {
+                    let operand = codegen_operand(fx, operand);
+                    let layout = operand.layout();
+                    let val = operand.load_scalar(fx);
+                    let res = match un_op {
+                        UnOp::Not => match layout.ty.kind() {
+                            ty::Bool => {
+                                let res = fx.bcx.ins().icmp_imm(IntCC::Equal, val, 0);
+                                CValue::by_val(res, layout)
+                            }
+                            ty::Uint(_) | ty::Int(_) => {
+                                CValue::by_val(fx.bcx.ins().bnot(val), layout)
+                            }
+                            _ => unreachable!("un op Not for {:?}", layout.ty),
+                        },
+                        UnOp::Neg => match layout.ty.kind() {
+                            ty::Int(_) => CValue::by_val(fx.bcx.ins().ineg(val), layout),
+                            ty::Float(_) => CValue::by_val(fx.bcx.ins().fneg(val), layout),
+                            _ => unreachable!("un op Neg for {:?}", layout.ty),
+                        },
+                    };
+                    lval.write_cvalue(fx, res);
+                }
+                Rvalue::Cast(
+                    CastKind::PointerCoercion(PointerCoercion::ReifyFnPointer),
+                    ref operand,
+                    to_ty,
+                ) => {
+                    let from_ty = fx.monomorphize(operand.ty(&fx.mir.local_decls, fx.tcx));
+                    let to_layout = fx.layout_of(fx.monomorphize(to_ty));
+                    match *from_ty.kind() {
+                        ty::FnDef(def_id, args) => {
+                            let func_ref = fx.get_function_ref(
+                                Instance::resolve_for_fn_ptr(
+                                    fx.tcx,
+                                    ParamEnv::reveal_all(),
+                                    def_id,
+                                    args,
+                                )
+                                .unwrap()
+                                .polymorphize(fx.tcx),
+                            );
+                            let func_addr = fx.bcx.ins().func_addr(fx.pointer_type, func_ref);
+                            lval.write_cvalue(fx, CValue::by_val(func_addr, to_layout));
+                        }
+                        _ => bug!("Trying to ReifyFnPointer on non FnDef {:?}", from_ty),
+                    }
+                }
+                Rvalue::Cast(
+                    CastKind::PointerCoercion(PointerCoercion::UnsafeFnPointer),
+                    ref operand,
+                    to_ty,
+                )
+                | Rvalue::Cast(
+                    CastKind::PointerCoercion(PointerCoercion::MutToConstPointer),
+                    ref operand,
+                    to_ty,
+                )
+                | Rvalue::Cast(
+                    CastKind::PointerCoercion(PointerCoercion::ArrayToPointer),
+                    ref operand,
+                    to_ty,
+                ) => {
+                    let to_layout = fx.layout_of(fx.monomorphize(to_ty));
+                    let operand = codegen_operand(fx, operand);
+                    lval.write_cvalue(fx, operand.cast_pointer_to(to_layout));
+                }
+                Rvalue::Cast(
+                    CastKind::IntToInt
+                    | CastKind::FloatToFloat
+                    | CastKind::FloatToInt
+                    | CastKind::IntToFloat
+                    | CastKind::FnPtrToPtr
+                    | CastKind::PtrToPtr
+                    | CastKind::PointerExposeProvenance
+                    | CastKind::PointerWithExposedProvenance,
+                    ref operand,
+                    to_ty,
+                ) => {
+                    let operand = codegen_operand(fx, operand);
+                    let from_ty = operand.layout().ty;
+                    let to_ty = fx.monomorphize(to_ty);
+
+                    fn is_fat_ptr<'tcx>(fx: &FunctionCx<'_, '_, 'tcx>, ty: Ty<'tcx>) -> bool {
+                        ty.builtin_deref(true)
+                            .is_some_and(|pointee_ty| has_ptr_meta(fx.tcx, pointee_ty))
+                    }
+
+                    if is_fat_ptr(fx, from_ty) {
+                        if is_fat_ptr(fx, to_ty) {
+                            // fat-ptr -> fat-ptr
+                            lval.write_cvalue(fx, operand.cast_pointer_to(dest_layout));
+                        } else {
+                            // fat-ptr -> thin-ptr
+                            let (ptr, _extra) = operand.load_scalar_pair(fx);
+                            lval.write_cvalue(fx, CValue::by_val(ptr, dest_layout))
+                        }
+                    } else {
+                        let to_clif_ty = fx.clif_type(to_ty).unwrap();
+                        let from = operand.load_scalar(fx);
+
+                        let res = clif_int_or_float_cast(
+                            fx,
+                            from,
+                            type_sign(from_ty),
+                            to_clif_ty,
+                            type_sign(to_ty),
+                        );
+                        lval.write_cvalue(fx, CValue::by_val(res, dest_layout));
+                    }
+                }
+                Rvalue::Cast(
+                    CastKind::PointerCoercion(PointerCoercion::ClosureFnPointer(_)),
+                    ref operand,
+                    _to_ty,
+                ) => {
+                    let operand = codegen_operand(fx, operand);
+                    match *operand.layout().ty.kind() {
+                        ty::Closure(def_id, args) => {
+                            let instance = Instance::resolve_closure(
+                                fx.tcx,
+                                def_id,
+                                args,
+                                ty::ClosureKind::FnOnce,
+                            )
+                            .polymorphize(fx.tcx);
+                            let func_ref = fx.get_function_ref(instance);
+                            let func_addr = fx.bcx.ins().func_addr(fx.pointer_type, func_ref);
+                            lval.write_cvalue(fx, CValue::by_val(func_addr, lval.layout()));
+                        }
+                        _ => bug!("{} cannot be cast to a fn ptr", operand.layout().ty),
+                    }
+                }
+                Rvalue::Cast(
+                    CastKind::PointerCoercion(PointerCoercion::Unsize),
+                    ref operand,
+                    _to_ty,
+                ) => {
+                    let operand = codegen_operand(fx, operand);
+                    crate::unsize::coerce_unsized_into(fx, operand, lval);
+                }
+                Rvalue::Cast(CastKind::DynStar, ref operand, _) => {
+                    let operand = codegen_operand(fx, operand);
+                    crate::unsize::coerce_dyn_star(fx, operand, lval);
+                }
+                Rvalue::Cast(CastKind::Transmute, ref operand, _to_ty) => {
+                    let operand = codegen_operand(fx, operand);
+                    lval.write_cvalue_transmute(fx, operand);
+                }
+                Rvalue::Discriminant(place) => {
+                    let place = codegen_place(fx, place);
+                    let value = place.to_cvalue(fx);
+                    crate::discriminant::codegen_get_discriminant(fx, lval, value, dest_layout);
+                }
+                Rvalue::Repeat(ref operand, times) => {
+                    let operand = codegen_operand(fx, operand);
+                    let times =
+                        fx.monomorphize(times).eval_target_usize(fx.tcx, ParamEnv::reveal_all());
+                    if operand.layout().size.bytes() == 0 {
+                        // Do nothing for ZST's
+                    } else if fx.clif_type(operand.layout().ty) == Some(types::I8) {
+                        let times = fx.bcx.ins().iconst(fx.pointer_type, times as i64);
+                        // FIXME use emit_small_memset where possible
+                        let addr = lval.to_ptr().get_addr(fx);
+                        let val = operand.load_scalar(fx);
+                        fx.bcx.call_memset(fx.target_config, addr, val, times);
+                    } else {
+                        let loop_block = fx.bcx.create_block();
+                        let loop_block2 = fx.bcx.create_block();
+                        let done_block = fx.bcx.create_block();
+                        let index = fx.bcx.append_block_param(loop_block, fx.pointer_type);
+                        let zero = fx.bcx.ins().iconst(fx.pointer_type, 0);
+                        fx.bcx.ins().jump(loop_block, &[zero]);
+
+                        fx.bcx.switch_to_block(loop_block);
+                        let done = fx.bcx.ins().icmp_imm(IntCC::Equal, index, times as i64);
+                        fx.bcx.ins().brif(done, done_block, &[], loop_block2, &[]);
+
+                        fx.bcx.switch_to_block(loop_block2);
+                        let to = lval.place_index(fx, index);
+                        to.write_cvalue(fx, operand);
+                        let index = fx.bcx.ins().iadd_imm(index, 1);
+                        fx.bcx.ins().jump(loop_block, &[index]);
+
+                        fx.bcx.switch_to_block(done_block);
+                        fx.bcx.ins().nop();
+                    }
+                }
+                Rvalue::Len(place) => {
+                    let place = codegen_place(fx, place);
+                    let usize_layout = fx.layout_of(fx.tcx.types.usize);
+                    let len = codegen_array_len(fx, place);
+                    lval.write_cvalue(fx, CValue::by_val(len, usize_layout));
+                }
+                Rvalue::ShallowInitBox(ref operand, content_ty) => {
+                    let content_ty = fx.monomorphize(content_ty);
+                    let box_layout = fx.layout_of(Ty::new_box(fx.tcx, content_ty));
+                    let operand = codegen_operand(fx, operand);
+                    let operand = operand.load_scalar(fx);
+                    lval.write_cvalue(fx, CValue::by_val(operand, box_layout));
+                }
+                Rvalue::NullaryOp(ref null_op, ty) => {
+                    assert!(lval.layout().ty.is_sized(fx.tcx, ParamEnv::reveal_all()));
+                    let layout = fx.layout_of(fx.monomorphize(ty));
+                    let val = match null_op {
+                        NullOp::SizeOf => layout.size.bytes(),
+                        NullOp::AlignOf => layout.align.abi.bytes(),
+                        NullOp::OffsetOf(fields) => {
+                            layout.offset_of_subfield(fx, fields.iter()).bytes()
+                        }
+                        NullOp::UbChecks => {
+                            let val = fx.tcx.sess.ub_checks();
+                            let val = CValue::by_val(
+                                fx.bcx.ins().iconst(types::I8, i64::try_from(val).unwrap()),
+                                fx.layout_of(fx.tcx.types.bool),
+                            );
+                            lval.write_cvalue(fx, val);
+                            return;
+                        }
+                    };
+                    let val = CValue::by_val(
+                        fx.bcx.ins().iconst(fx.pointer_type, i64::try_from(val).unwrap()),
+                        fx.layout_of(fx.tcx.types.usize),
+                    );
+                    lval.write_cvalue(fx, val);
+                }
+                Rvalue::Aggregate(ref kind, ref operands)
+                    if matches!(**kind, AggregateKind::RawPtr(..)) =>
+                {
+                    let ty = to_place_and_rval.1.ty(&fx.mir.local_decls, fx.tcx);
+                    let layout = fx.layout_of(fx.monomorphize(ty));
+                    let [data, meta] = &*operands.raw else {
+                        bug!("RawPtr fields: {operands:?}");
+                    };
+                    let data = codegen_operand(fx, data);
+                    let meta = codegen_operand(fx, meta);
+                    assert!(data.layout().ty.is_unsafe_ptr());
+                    assert!(layout.ty.is_unsafe_ptr());
+                    let ptr_val = if meta.layout().is_zst() {
+                        data.cast_pointer_to(layout)
+                    } else {
+                        CValue::by_val_pair(data.load_scalar(fx), meta.load_scalar(fx), layout)
+                    };
+                    lval.write_cvalue(fx, ptr_val);
+                }
+                Rvalue::Aggregate(ref kind, ref operands) => {
+                    let (variant_index, variant_dest, active_field_index) = match **kind {
+                        mir::AggregateKind::Adt(_, variant_index, _, _, active_field_index) => {
+                            let variant_dest = lval.downcast_variant(fx, variant_index);
+                            (variant_index, variant_dest, active_field_index)
+                        }
+                        _ => (FIRST_VARIANT, lval, None),
+                    };
+                    if active_field_index.is_some() {
+                        assert_eq!(operands.len(), 1);
+                    }
+                    for (i, operand) in operands.iter_enumerated() {
+                        let operand = codegen_operand(fx, operand);
+                        let field_index = active_field_index.unwrap_or(i);
+                        let to = if let mir::AggregateKind::Array(_) = **kind {
+                            let array_index = i64::from(field_index.as_u32());
+                            let index = fx.bcx.ins().iconst(fx.pointer_type, array_index);
+                            variant_dest.place_index(fx, index)
+                        } else {
+                            variant_dest.place_field(fx, field_index)
+                        };
+                        to.write_cvalue(fx, operand);
+                    }
+                    crate::discriminant::codegen_set_discriminant(fx, lval, variant_index);
+                }
+            }
+        }
+        StatementKind::StorageLive(_)
+        | StatementKind::StorageDead(_)
+        | StatementKind::Deinit(_)
+        | StatementKind::ConstEvalCounter
+        | StatementKind::Nop
+        | StatementKind::FakeRead(..)
+        | StatementKind::Retag { .. }
+        | StatementKind::PlaceMention(..)
+        | StatementKind::AscribeUserType(..) => {}
+
+        StatementKind::Coverage { .. } => fx.tcx.dcx().fatal("-Zcoverage is unimplemented"),
+        StatementKind::Intrinsic(ref intrinsic) => match &**intrinsic {
+            // We ignore `assume` intrinsics, they are only useful for optimizations
+            NonDivergingIntrinsic::Assume(_) => {}
+            NonDivergingIntrinsic::CopyNonOverlapping(mir::CopyNonOverlapping {
+                src,
+                dst,
+                count,
+            }) => {
+                let dst = codegen_operand(fx, dst);
+                let pointee = dst
+                    .layout()
+                    .pointee_info_at(fx, rustc_target::abi::Size::ZERO)
+                    .expect("Expected pointer");
+                let dst = dst.load_scalar(fx);
+                let src = codegen_operand(fx, src).load_scalar(fx);
+                let count = codegen_operand(fx, count).load_scalar(fx);
+                let elem_size: u64 = pointee.size.bytes();
+                let bytes = if elem_size != 1 {
+                    fx.bcx.ins().imul_imm(count, elem_size as i64)
+                } else {
+                    count
+                };
+                fx.bcx.call_memcpy(fx.target_config, dst, src, bytes);
+            }
+        },
+    }
+}
+
+fn codegen_array_len<'tcx>(fx: &mut FunctionCx<'_, '_, 'tcx>, place: CPlace<'tcx>) -> Value {
+    match *place.layout().ty.kind() {
+        ty::Array(_elem_ty, len) => {
+            let len = fx.monomorphize(len).eval_target_usize(fx.tcx, ParamEnv::reveal_all()) as i64;
+            fx.bcx.ins().iconst(fx.pointer_type, len)
+        }
+        ty::Slice(_elem_ty) => place.to_ptr_unsized().1,
+        _ => bug!("Rvalue::Len({:?})", place),
+    }
+}
+
+pub(crate) fn codegen_place<'tcx>(
+    fx: &mut FunctionCx<'_, '_, 'tcx>,
+    place: Place<'tcx>,
+) -> CPlace<'tcx> {
+    let mut cplace = fx.get_local_place(place.local);
+
+    for elem in place.projection {
+        match elem {
+            PlaceElem::Deref => {
+                cplace = cplace.place_deref(fx);
+            }
+            PlaceElem::OpaqueCast(ty) => bug!("encountered OpaqueCast({ty}) in codegen"),
+            PlaceElem::Subtype(ty) => cplace = cplace.place_transmute_type(fx, fx.monomorphize(ty)),
+            PlaceElem::Field(field, _ty) => {
+                cplace = cplace.place_field(fx, field);
+            }
+            PlaceElem::Index(local) => {
+                let index = fx.get_local_place(local).to_cvalue(fx).load_scalar(fx);
+                cplace = cplace.place_index(fx, index);
+            }
+            PlaceElem::ConstantIndex { offset, min_length: _, from_end } => {
+                let offset: u64 = offset;
+                let index = if !from_end {
+                    fx.bcx.ins().iconst(fx.pointer_type, offset as i64)
+                } else {
+                    let len = codegen_array_len(fx, cplace);
+                    fx.bcx.ins().iadd_imm(len, -(offset as i64))
+                };
+                cplace = cplace.place_index(fx, index);
+            }
+            PlaceElem::Subslice { from, to, from_end } => {
+                // These indices are generated by slice patterns.
+                // slice[from:-to] in Python terms.
+
+                let from: u64 = from;
+                let to: u64 = to;
+
+                match cplace.layout().ty.kind() {
+                    ty::Array(elem_ty, _len) => {
+                        assert!(!from_end, "array subslices are never `from_end`");
+                        let elem_layout = fx.layout_of(*elem_ty);
+                        let ptr = cplace.to_ptr();
+                        cplace = CPlace::for_ptr(
+                            ptr.offset_i64(fx, elem_layout.size.bytes() as i64 * (from as i64)),
+                            fx.layout_of(Ty::new_array(fx.tcx, *elem_ty, to - from)),
+                        );
+                    }
+                    ty::Slice(elem_ty) => {
+                        assert!(from_end, "slice subslices should be `from_end`");
+                        let elem_layout = fx.layout_of(*elem_ty);
+                        let (ptr, len) = cplace.to_ptr_unsized();
+                        cplace = CPlace::for_ptr_with_extra(
+                            ptr.offset_i64(fx, elem_layout.size.bytes() as i64 * (from as i64)),
+                            fx.bcx.ins().iadd_imm(len, -(from as i64 + to as i64)),
+                            cplace.layout(),
+                        );
+                    }
+                    _ => unreachable!(),
+                }
+            }
+            PlaceElem::Downcast(_adt_def, variant) => {
+                cplace = cplace.downcast_variant(fx, variant);
+            }
+        }
+    }
+
+    cplace
+}
+
+pub(crate) fn codegen_operand<'tcx>(
+    fx: &mut FunctionCx<'_, '_, 'tcx>,
+    operand: &Operand<'tcx>,
+) -> CValue<'tcx> {
+    match operand {
+        Operand::Move(place) | Operand::Copy(place) => {
+            let cplace = codegen_place(fx, *place);
+            cplace.to_cvalue(fx)
+        }
+        Operand::Constant(const_) => crate::constant::codegen_constant_operand(fx, const_),
+    }
+}
+
+pub(crate) fn codegen_panic_nounwind<'tcx>(
+    fx: &mut FunctionCx<'_, '_, 'tcx>,
+    msg_str: &str,
+    span: Option<Span>,
+) {
+    let msg_ptr = fx.anonymous_str(msg_str);
+    let msg_len = fx.bcx.ins().iconst(fx.pointer_type, i64::try_from(msg_str.len()).unwrap());
+    let args = [msg_ptr, msg_len];
+
+    codegen_panic_inner(fx, rustc_hir::LangItem::PanicNounwind, &args, span);
+}
+
+pub(crate) fn codegen_unwind_terminate<'tcx>(
+    fx: &mut FunctionCx<'_, '_, 'tcx>,
+    source_info: mir::SourceInfo,
+    reason: UnwindTerminateReason,
+) {
+    let args = [];
+
+    codegen_panic_inner(fx, reason.lang_item(), &args, Some(source_info.span));
+}
+
+fn codegen_panic_inner<'tcx>(
+    fx: &mut FunctionCx<'_, '_, 'tcx>,
+    lang_item: rustc_hir::LangItem,
+    args: &[Value],
+    span: Option<Span>,
+) {
+    let def_id = fx.tcx.require_lang_item(lang_item, span);
+
+    let instance = Instance::mono(fx.tcx, def_id).polymorphize(fx.tcx);
+
+    if is_call_from_compiler_builtins_to_upstream_monomorphization(fx.tcx, instance) {
+        fx.bcx.ins().trap(TrapCode::User(0));
+        return;
+    }
+
+    let symbol_name = fx.tcx.symbol_name(instance).name;
+
+    fx.lib_call(
+        symbol_name,
+        args.iter().map(|&arg| AbiParam::new(fx.bcx.func.dfg.value_type(arg))).collect(),
+        vec![],
+        args,
+    );
+
+    fx.bcx.ins().trap(TrapCode::UnreachableCodeReached);
+}
diff --git a/compiler/rustc_codegen_cranelift/src/cast.rs b/compiler/rustc_codegen_cranelift/src/cast.rs
new file mode 100644
index 00000000000..0b5cb1547fc
--- /dev/null
+++ b/compiler/rustc_codegen_cranelift/src/cast.rs
@@ -0,0 +1,168 @@
+//! Various number casting functions
+
+use crate::prelude::*;
+
+pub(crate) fn clif_intcast(
+    fx: &mut FunctionCx<'_, '_, '_>,
+    val: Value,
+    to: Type,
+    signed: bool,
+) -> Value {
+    let from = fx.bcx.func.dfg.value_type(val);
+    match (from, to) {
+        // equal
+        (_, _) if from == to => val,
+
+        // extend
+        (_, _) if to.wider_or_equal(from) => {
+            if signed {
+                fx.bcx.ins().sextend(to, val)
+            } else {
+                fx.bcx.ins().uextend(to, val)
+            }
+        }
+
+        // reduce
+        (_, _) => fx.bcx.ins().ireduce(to, val),
+    }
+}
+
+pub(crate) fn clif_int_or_float_cast(
+    fx: &mut FunctionCx<'_, '_, '_>,
+    from: Value,
+    from_signed: bool,
+    to_ty: Type,
+    to_signed: bool,
+) -> Value {
+    let from_ty = fx.bcx.func.dfg.value_type(from);
+
+    if from_ty.is_int() && to_ty.is_int() {
+        // int-like -> int-like
+        clif_intcast(
+            fx,
+            from,
+            to_ty,
+            // This is correct as either from_signed == to_signed (=> this is trivially correct)
+            // Or from_clif_ty == to_clif_ty, which means this is a no-op.
+            from_signed,
+        )
+    } else if from_ty.is_int() && to_ty.is_float() {
+        if from_ty == types::I128 {
+            // _______ss__f_
+            // __float  tisf: i128 -> f32
+            // __float  tidf: i128 -> f64
+            // __floatuntisf: u128 -> f32
+            // __floatuntidf: u128 -> f64
+
+            let name = format!(
+                "__float{sign}ti{flt}f",
+                sign = if from_signed { "" } else { "un" },
+                flt = match to_ty {
+                    types::F32 => "s",
+                    types::F64 => "d",
+                    _ => unreachable!("{:?}", to_ty),
+                },
+            );
+
+            return fx.lib_call(
+                &name,
+                vec![AbiParam::new(types::I128)],
+                vec![AbiParam::new(to_ty)],
+                &[from],
+            )[0];
+        }
+
+        // int-like -> float
+        if from_signed {
+            fx.bcx.ins().fcvt_from_sint(to_ty, from)
+        } else {
+            fx.bcx.ins().fcvt_from_uint(to_ty, from)
+        }
+    } else if from_ty.is_float() && to_ty.is_int() {
+        let val = if to_ty == types::I128 {
+            // _____sssf___
+            // __fix   sfti: f32 -> i128
+            // __fix   dfti: f64 -> i128
+            // __fixunssfti: f32 -> u128
+            // __fixunsdfti: f64 -> u128
+
+            let name = format!(
+                "__fix{sign}{flt}fti",
+                sign = if to_signed { "" } else { "uns" },
+                flt = match from_ty {
+                    types::F32 => "s",
+                    types::F64 => "d",
+                    _ => unreachable!("{:?}", to_ty),
+                },
+            );
+
+            if fx.tcx.sess.target.is_like_windows {
+                let ret = fx.lib_call(
+                    &name,
+                    vec![AbiParam::new(from_ty)],
+                    vec![AbiParam::new(types::I64X2)],
+                    &[from],
+                )[0];
+                // FIXME(bytecodealliance/wasmtime#6104) use bitcast instead of store to get from i64x2 to i128
+                let ret_ptr = fx.create_stack_slot(16, 16);
+                ret_ptr.store(fx, ret, MemFlags::trusted());
+                ret_ptr.load(fx, types::I128, MemFlags::trusted())
+            } else {
+                fx.lib_call(
+                    &name,
+                    vec![AbiParam::new(from_ty)],
+                    vec![AbiParam::new(types::I128)],
+                    &[from],
+                )[0]
+            }
+        } else if to_ty == types::I8 || to_ty == types::I16 {
+            // FIXME implement fcvt_to_*int_sat.i8/i16
+            let val = if to_signed {
+                fx.bcx.ins().fcvt_to_sint_sat(types::I32, from)
+            } else {
+                fx.bcx.ins().fcvt_to_uint_sat(types::I32, from)
+            };
+            let (min, max) = match (to_ty, to_signed) {
+                (types::I8, false) => (0, i64::from(u8::MAX)),
+                (types::I16, false) => (0, i64::from(u16::MAX)),
+                (types::I8, true) => (i64::from(i8::MIN as u32), i64::from(i8::MAX as u32)),
+                (types::I16, true) => (i64::from(i16::MIN as u32), i64::from(i16::MAX as u32)),
+                _ => unreachable!(),
+            };
+            let min_val = fx.bcx.ins().iconst(types::I32, min);
+            let max_val = fx.bcx.ins().iconst(types::I32, max);
+
+            let val = if to_signed {
+                let has_underflow = fx.bcx.ins().icmp_imm(IntCC::SignedLessThan, val, min);
+                let has_overflow = fx.bcx.ins().icmp_imm(IntCC::SignedGreaterThan, val, max);
+                let bottom_capped = fx.bcx.ins().select(has_underflow, min_val, val);
+                fx.bcx.ins().select(has_overflow, max_val, bottom_capped)
+            } else {
+                let has_overflow = fx.bcx.ins().icmp_imm(IntCC::UnsignedGreaterThan, val, max);
+                fx.bcx.ins().select(has_overflow, max_val, val)
+            };
+            fx.bcx.ins().ireduce(to_ty, val)
+        } else if to_signed {
+            fx.bcx.ins().fcvt_to_sint_sat(to_ty, from)
+        } else {
+            fx.bcx.ins().fcvt_to_uint_sat(to_ty, from)
+        };
+
+        if let Some(false) = fx.tcx.sess.opts.unstable_opts.saturating_float_casts {
+            return val;
+        }
+
+        let is_not_nan = fx.bcx.ins().fcmp(FloatCC::Equal, from, from);
+        let zero = type_zero_value(&mut fx.bcx, to_ty);
+        fx.bcx.ins().select(is_not_nan, val, zero)
+    } else if from_ty.is_float() && to_ty.is_float() {
+        // float -> float
+        match (from_ty, to_ty) {
+            (types::F32, types::F64) => fx.bcx.ins().fpromote(types::F64, from),
+            (types::F64, types::F32) => fx.bcx.ins().fdemote(types::F32, from),
+            _ => from,
+        }
+    } else {
+        unreachable!("cast value from {:?} to {:?}", from_ty, to_ty);
+    }
+}
diff --git a/compiler/rustc_codegen_cranelift/src/codegen_i128.rs b/compiler/rustc_codegen_cranelift/src/codegen_i128.rs
new file mode 100644
index 00000000000..4a5ef352151
--- /dev/null
+++ b/compiler/rustc_codegen_cranelift/src/codegen_i128.rs
@@ -0,0 +1,141 @@
+//! Replaces 128-bit operators with lang item calls where necessary
+
+use cranelift_codegen::ir::ArgumentPurpose;
+
+use crate::prelude::*;
+
+pub(crate) fn maybe_codegen<'tcx>(
+    fx: &mut FunctionCx<'_, '_, 'tcx>,
+    bin_op: BinOp,
+    lhs: CValue<'tcx>,
+    rhs: CValue<'tcx>,
+) -> Option<CValue<'tcx>> {
+    if lhs.layout().ty != fx.tcx.types.u128
+        && lhs.layout().ty != fx.tcx.types.i128
+        && rhs.layout().ty != fx.tcx.types.u128
+        && rhs.layout().ty != fx.tcx.types.i128
+    {
+        return None;
+    }
+
+    let is_signed = type_sign(lhs.layout().ty);
+
+    match bin_op {
+        BinOp::BitAnd | BinOp::BitOr | BinOp::BitXor => None,
+        BinOp::Add | BinOp::AddUnchecked | BinOp::Sub | BinOp::SubUnchecked => None,
+        BinOp::Mul | BinOp::MulUnchecked => {
+            let args = [lhs.load_scalar(fx), rhs.load_scalar(fx)];
+            let ret_val = fx.lib_call(
+                "__multi3",
+                vec![AbiParam::new(types::I128), AbiParam::new(types::I128)],
+                vec![AbiParam::new(types::I128)],
+                &args,
+            )[0];
+            Some(CValue::by_val(
+                ret_val,
+                fx.layout_of(if is_signed { fx.tcx.types.i128 } else { fx.tcx.types.u128 }),
+            ))
+        }
+        BinOp::Offset => unreachable!("offset should only be used on pointers, not 128bit ints"),
+        BinOp::Div | BinOp::Rem => {
+            let name = match (bin_op, is_signed) {
+                (BinOp::Div, false) => "__udivti3",
+                (BinOp::Div, true) => "__divti3",
+                (BinOp::Rem, false) => "__umodti3",
+                (BinOp::Rem, true) => "__modti3",
+                _ => unreachable!(),
+            };
+            if fx.tcx.sess.target.is_like_windows {
+                let args = [lhs.load_scalar(fx), rhs.load_scalar(fx)];
+                let ret = fx.lib_call(
+                    name,
+                    vec![AbiParam::new(types::I128), AbiParam::new(types::I128)],
+                    vec![AbiParam::new(types::I64X2)],
+                    &args,
+                )[0];
+                // FIXME(bytecodealliance/wasmtime#6104) use bitcast instead of store to get from i64x2 to i128
+                let ret_place = CPlace::new_stack_slot(fx, lhs.layout());
+                ret_place.to_ptr().store(fx, ret, MemFlags::trusted());
+                Some(ret_place.to_cvalue(fx))
+            } else {
+                let args = [lhs.load_scalar(fx), rhs.load_scalar(fx)];
+                let ret_val = fx.lib_call(
+                    name,
+                    vec![AbiParam::new(types::I128), AbiParam::new(types::I128)],
+                    vec![AbiParam::new(types::I128)],
+                    &args,
+                )[0];
+                Some(CValue::by_val(ret_val, lhs.layout()))
+            }
+        }
+        BinOp::Lt | BinOp::Le | BinOp::Eq | BinOp::Ge | BinOp::Gt | BinOp::Ne | BinOp::Cmp => None,
+        BinOp::Shl | BinOp::ShlUnchecked | BinOp::Shr | BinOp::ShrUnchecked => None,
+    }
+}
+
+pub(crate) fn maybe_codegen_checked<'tcx>(
+    fx: &mut FunctionCx<'_, '_, 'tcx>,
+    bin_op: BinOp,
+    lhs: CValue<'tcx>,
+    rhs: CValue<'tcx>,
+) -> Option<CValue<'tcx>> {
+    if lhs.layout().ty != fx.tcx.types.u128
+        && lhs.layout().ty != fx.tcx.types.i128
+        && rhs.layout().ty != fx.tcx.types.u128
+        && rhs.layout().ty != fx.tcx.types.i128
+    {
+        return None;
+    }
+
+    let is_signed = type_sign(lhs.layout().ty);
+
+    match bin_op {
+        BinOp::BitAnd | BinOp::BitOr | BinOp::BitXor => unreachable!(),
+        BinOp::Mul if is_signed => {
+            let out_ty = Ty::new_tup(fx.tcx, &[lhs.layout().ty, fx.tcx.types.bool]);
+            let oflow = CPlace::new_stack_slot(fx, fx.layout_of(fx.tcx.types.i32));
+            let lhs = lhs.load_scalar(fx);
+            let rhs = rhs.load_scalar(fx);
+            let oflow_ptr = oflow.to_ptr().get_addr(fx);
+            let res = fx.lib_call_unadjusted(
+                "__muloti4",
+                vec![
+                    AbiParam::new(types::I128),
+                    AbiParam::new(types::I128),
+                    AbiParam::new(fx.pointer_type),
+                ],
+                vec![AbiParam::new(types::I128)],
+                &[lhs, rhs, oflow_ptr],
+            )[0];
+            let oflow = oflow.to_cvalue(fx).load_scalar(fx);
+            let oflow = fx.bcx.ins().ireduce(types::I8, oflow);
+            Some(CValue::by_val_pair(res, oflow, fx.layout_of(out_ty)))
+        }
+        BinOp::Add | BinOp::Sub | BinOp::Mul => {
+            let out_ty = Ty::new_tup(fx.tcx, &[lhs.layout().ty, fx.tcx.types.bool]);
+            let out_place = CPlace::new_stack_slot(fx, fx.layout_of(out_ty));
+            let param_types = vec![
+                AbiParam::special(fx.pointer_type, ArgumentPurpose::StructReturn),
+                AbiParam::new(types::I128),
+                AbiParam::new(types::I128),
+            ];
+            let args = [out_place.to_ptr().get_addr(fx), lhs.load_scalar(fx), rhs.load_scalar(fx)];
+            let name = match (bin_op, is_signed) {
+                (BinOp::Add, false) => "__rust_u128_addo",
+                (BinOp::Add, true) => "__rust_i128_addo",
+                (BinOp::Sub, false) => "__rust_u128_subo",
+                (BinOp::Sub, true) => "__rust_i128_subo",
+                (BinOp::Mul, false) => "__rust_u128_mulo",
+                _ => unreachable!(),
+            };
+            fx.lib_call(name, param_types, vec![], &args);
+            Some(out_place.to_cvalue(fx))
+        }
+        BinOp::AddUnchecked | BinOp::SubUnchecked | BinOp::MulUnchecked => unreachable!(),
+        BinOp::Offset => unreachable!("offset should only be used on pointers, not 128bit ints"),
+        BinOp::Div | BinOp::Rem => unreachable!(),
+        BinOp::Cmp => unreachable!(),
+        BinOp::Lt | BinOp::Le | BinOp::Eq | BinOp::Ge | BinOp::Gt | BinOp::Ne => unreachable!(),
+        BinOp::Shl | BinOp::ShlUnchecked | BinOp::Shr | BinOp::ShrUnchecked => unreachable!(),
+    }
+}
diff --git a/compiler/rustc_codegen_cranelift/src/common.rs b/compiler/rustc_codegen_cranelift/src/common.rs
new file mode 100644
index 00000000000..21d0cd2d30f
--- /dev/null
+++ b/compiler/rustc_codegen_cranelift/src/common.rs
@@ -0,0 +1,518 @@
+use cranelift_codegen::isa::TargetFrontendConfig;
+use cranelift_frontend::{FunctionBuilder, FunctionBuilderContext};
+use rustc_index::IndexVec;
+use rustc_middle::ty::layout::{
+    self, FnAbiError, FnAbiOfHelpers, FnAbiRequest, LayoutError, LayoutOfHelpers,
+};
+use rustc_middle::ty::TypeFoldable;
+use rustc_span::source_map::Spanned;
+use rustc_target::abi::call::FnAbi;
+use rustc_target::abi::{Float, Integer, Primitive};
+use rustc_target::spec::{HasTargetSpec, Target};
+
+use crate::constant::ConstantCx;
+use crate::debuginfo::FunctionDebugContext;
+use crate::prelude::*;
+
+pub(crate) fn pointer_ty(tcx: TyCtxt<'_>) -> types::Type {
+    match tcx.data_layout.pointer_size.bits() {
+        16 => types::I16,
+        32 => types::I32,
+        64 => types::I64,
+        bits => bug!("ptr_sized_integer: unknown pointer bit size {}", bits),
+    }
+}
+
+pub(crate) fn scalar_to_clif_type(tcx: TyCtxt<'_>, scalar: Scalar) -> Type {
+    match scalar.primitive() {
+        Primitive::Int(int, _sign) => match int {
+            Integer::I8 => types::I8,
+            Integer::I16 => types::I16,
+            Integer::I32 => types::I32,
+            Integer::I64 => types::I64,
+            Integer::I128 => types::I128,
+        },
+        Primitive::Float(float) => match float {
+            Float::F16 => unimplemented!("f16_f128"),
+            Float::F32 => types::F32,
+            Float::F64 => types::F64,
+            Float::F128 => unimplemented!("f16_f128"),
+        },
+        // FIXME(erikdesjardins): handle non-default addrspace ptr sizes
+        Primitive::Pointer(_) => pointer_ty(tcx),
+    }
+}
+
+fn clif_type_from_ty<'tcx>(tcx: TyCtxt<'tcx>, ty: Ty<'tcx>) -> Option<types::Type> {
+    Some(match ty.kind() {
+        ty::Bool => types::I8,
+        ty::Uint(size) => match size {
+            UintTy::U8 => types::I8,
+            UintTy::U16 => types::I16,
+            UintTy::U32 => types::I32,
+            UintTy::U64 => types::I64,
+            UintTy::U128 => types::I128,
+            UintTy::Usize => pointer_ty(tcx),
+        },
+        ty::Int(size) => match size {
+            IntTy::I8 => types::I8,
+            IntTy::I16 => types::I16,
+            IntTy::I32 => types::I32,
+            IntTy::I64 => types::I64,
+            IntTy::I128 => types::I128,
+            IntTy::Isize => pointer_ty(tcx),
+        },
+        ty::Char => types::I32,
+        ty::Float(size) => match size {
+            FloatTy::F16 => unimplemented!("f16_f128"),
+            FloatTy::F32 => types::F32,
+            FloatTy::F64 => types::F64,
+            FloatTy::F128 => unimplemented!("f16_f128"),
+        },
+        ty::FnPtr(_) => pointer_ty(tcx),
+        ty::RawPtr(pointee_ty, _) | ty::Ref(_, pointee_ty, _) => {
+            if has_ptr_meta(tcx, *pointee_ty) {
+                return None;
+            } else {
+                pointer_ty(tcx)
+            }
+        }
+        ty::Param(_) => bug!("ty param {:?}", ty),
+        _ => return None,
+    })
+}
+
+fn clif_pair_type_from_ty<'tcx>(
+    tcx: TyCtxt<'tcx>,
+    ty: Ty<'tcx>,
+) -> Option<(types::Type, types::Type)> {
+    Some(match ty.kind() {
+        ty::Tuple(types) if types.len() == 2 => {
+            (clif_type_from_ty(tcx, types[0])?, clif_type_from_ty(tcx, types[1])?)
+        }
+        ty::RawPtr(pointee_ty, _) | ty::Ref(_, pointee_ty, _) => {
+            if has_ptr_meta(tcx, *pointee_ty) {
+                (pointer_ty(tcx), pointer_ty(tcx))
+            } else {
+                return None;
+            }
+        }
+        _ => return None,
+    })
+}
+
+/// Is a pointer to this type a fat ptr?
+pub(crate) fn has_ptr_meta<'tcx>(tcx: TyCtxt<'tcx>, ty: Ty<'tcx>) -> bool {
+    if ty.is_sized(tcx, ParamEnv::reveal_all()) {
+        return false;
+    }
+
+    let tail = tcx.struct_tail_erasing_lifetimes(ty, ParamEnv::reveal_all());
+    match tail.kind() {
+        ty::Foreign(..) => false,
+        ty::Str | ty::Slice(..) | ty::Dynamic(..) => true,
+        _ => bug!("unexpected unsized tail: {:?}", tail),
+    }
+}
+
+pub(crate) fn codegen_icmp_imm(
+    fx: &mut FunctionCx<'_, '_, '_>,
+    intcc: IntCC,
+    lhs: Value,
+    rhs: i128,
+) -> Value {
+    let lhs_ty = fx.bcx.func.dfg.value_type(lhs);
+    if lhs_ty == types::I128 {
+        // FIXME legalize `icmp_imm.i128` in Cranelift
+
+        let (lhs_lsb, lhs_msb) = fx.bcx.ins().isplit(lhs);
+        let (rhs_lsb, rhs_msb) = (rhs as u128 as u64 as i64, (rhs as u128 >> 64) as u64 as i64);
+
+        match intcc {
+            IntCC::Equal => {
+                let lsb_eq = fx.bcx.ins().icmp_imm(IntCC::Equal, lhs_lsb, rhs_lsb);
+                let msb_eq = fx.bcx.ins().icmp_imm(IntCC::Equal, lhs_msb, rhs_msb);
+                fx.bcx.ins().band(lsb_eq, msb_eq)
+            }
+            IntCC::NotEqual => {
+                let lsb_ne = fx.bcx.ins().icmp_imm(IntCC::NotEqual, lhs_lsb, rhs_lsb);
+                let msb_ne = fx.bcx.ins().icmp_imm(IntCC::NotEqual, lhs_msb, rhs_msb);
+                fx.bcx.ins().bor(lsb_ne, msb_ne)
+            }
+            _ => {
+                // if msb_eq {
+                //     lsb_cc
+                // } else {
+                //     msb_cc
+                // }
+
+                let msb_eq = fx.bcx.ins().icmp_imm(IntCC::Equal, lhs_msb, rhs_msb);
+                let lsb_cc = fx.bcx.ins().icmp_imm(intcc, lhs_lsb, rhs_lsb);
+                let msb_cc = fx.bcx.ins().icmp_imm(intcc, lhs_msb, rhs_msb);
+
+                fx.bcx.ins().select(msb_eq, lsb_cc, msb_cc)
+            }
+        }
+    } else {
+        let rhs = rhs as i64; // Truncates on purpose in case rhs is actually an unsigned value
+        fx.bcx.ins().icmp_imm(intcc, lhs, rhs)
+    }
+}
+
+pub(crate) fn codegen_bitcast(fx: &mut FunctionCx<'_, '_, '_>, dst_ty: Type, val: Value) -> Value {
+    let mut flags = MemFlags::new();
+    flags.set_endianness(match fx.tcx.data_layout.endian {
+        rustc_target::abi::Endian::Big => cranelift_codegen::ir::Endianness::Big,
+        rustc_target::abi::Endian::Little => cranelift_codegen::ir::Endianness::Little,
+    });
+    fx.bcx.ins().bitcast(dst_ty, flags, val)
+}
+
+pub(crate) fn type_zero_value(bcx: &mut FunctionBuilder<'_>, ty: Type) -> Value {
+    if ty == types::I128 {
+        let zero = bcx.ins().iconst(types::I64, 0);
+        bcx.ins().iconcat(zero, zero)
+    } else {
+        bcx.ins().iconst(ty, 0)
+    }
+}
+
+pub(crate) fn type_min_max_value(
+    bcx: &mut FunctionBuilder<'_>,
+    ty: Type,
+    signed: bool,
+) -> (Value, Value) {
+    assert!(ty.is_int());
+
+    if ty == types::I128 {
+        if signed {
+            let min = i128::MIN as u128;
+            let min_lsb = bcx.ins().iconst(types::I64, min as u64 as i64);
+            let min_msb = bcx.ins().iconst(types::I64, (min >> 64) as u64 as i64);
+            let min = bcx.ins().iconcat(min_lsb, min_msb);
+
+            let max = i128::MAX as u128;
+            let max_lsb = bcx.ins().iconst(types::I64, max as u64 as i64);
+            let max_msb = bcx.ins().iconst(types::I64, (max >> 64) as u64 as i64);
+            let max = bcx.ins().iconcat(max_lsb, max_msb);
+
+            return (min, max);
+        } else {
+            let min_half = bcx.ins().iconst(types::I64, 0);
+            let min = bcx.ins().iconcat(min_half, min_half);
+
+            let max_half = bcx.ins().iconst(types::I64, u64::MAX as i64);
+            let max = bcx.ins().iconcat(max_half, max_half);
+
+            return (min, max);
+        }
+    }
+
+    let min = match (ty, signed) {
+        (types::I8, false) | (types::I16, false) | (types::I32, false) | (types::I64, false) => {
+            0i64
+        }
+        (types::I8, true) => i64::from(i8::MIN as u8),
+        (types::I16, true) => i64::from(i16::MIN as u16),
+        (types::I32, true) => i64::from(i32::MIN as u32),
+        (types::I64, true) => i64::MIN,
+        _ => unreachable!(),
+    };
+
+    let max = match (ty, signed) {
+        (types::I8, false) => i64::from(u8::MAX),
+        (types::I16, false) => i64::from(u16::MAX),
+        (types::I32, false) => i64::from(u32::MAX),
+        (types::I64, false) => u64::MAX as i64,
+        (types::I8, true) => i64::from(i8::MAX as u8),
+        (types::I16, true) => i64::from(i16::MAX as u16),
+        (types::I32, true) => i64::from(i32::MAX as u32),
+        (types::I64, true) => i64::MAX,
+        _ => unreachable!(),
+    };
+
+    let (min, max) = (bcx.ins().iconst(ty, min), bcx.ins().iconst(ty, max));
+
+    (min, max)
+}
+
+pub(crate) fn type_sign(ty: Ty<'_>) -> bool {
+    match ty.kind() {
+        ty::Ref(..) | ty::RawPtr(..) | ty::FnPtr(..) | ty::Char | ty::Uint(..) | ty::Bool => false,
+        ty::Int(..) => true,
+        ty::Float(..) => false, // `signed` is unused for floats
+        _ => panic!("{}", ty),
+    }
+}
+
+pub(crate) fn create_wrapper_function(
+    module: &mut dyn Module,
+    unwind_context: &mut UnwindContext,
+    sig: Signature,
+    wrapper_name: &str,
+    callee_name: &str,
+) {
+    let wrapper_func_id = module.declare_function(wrapper_name, Linkage::Export, &sig).unwrap();
+    let callee_func_id = module.declare_function(callee_name, Linkage::Import, &sig).unwrap();
+
+    let mut ctx = Context::new();
+    ctx.func.signature = sig;
+    {
+        let mut func_ctx = FunctionBuilderContext::new();
+        let mut bcx = FunctionBuilder::new(&mut ctx.func, &mut func_ctx);
+
+        let block = bcx.create_block();
+        bcx.switch_to_block(block);
+        let func = &mut bcx.func.stencil;
+        let args = func
+            .signature
+            .params
+            .iter()
+            .map(|param| func.dfg.append_block_param(block, param.value_type))
+            .collect::<Vec<Value>>();
+
+        let callee_func_ref = module.declare_func_in_func(callee_func_id, &mut bcx.func);
+        let call_inst = bcx.ins().call(callee_func_ref, &args);
+        let results = bcx.inst_results(call_inst).to_vec(); // Clone to prevent borrow error
+
+        bcx.ins().return_(&results);
+        bcx.seal_all_blocks();
+        bcx.finalize();
+    }
+    module.define_function(wrapper_func_id, &mut ctx).unwrap();
+    unwind_context.add_function(wrapper_func_id, &ctx, module.isa());
+}
+
+pub(crate) struct FunctionCx<'m, 'clif, 'tcx: 'm> {
+    pub(crate) cx: &'clif mut crate::CodegenCx,
+    pub(crate) module: &'m mut dyn Module,
+    pub(crate) tcx: TyCtxt<'tcx>,
+    pub(crate) target_config: TargetFrontendConfig, // Cached from module
+    pub(crate) pointer_type: Type,                  // Cached from module
+    pub(crate) constants_cx: ConstantCx,
+    pub(crate) func_debug_cx: Option<FunctionDebugContext>,
+
+    pub(crate) instance: Instance<'tcx>,
+    pub(crate) symbol_name: String,
+    pub(crate) mir: &'tcx Body<'tcx>,
+    pub(crate) fn_abi: &'tcx FnAbi<'tcx, Ty<'tcx>>,
+
+    pub(crate) bcx: FunctionBuilder<'clif>,
+    pub(crate) block_map: IndexVec<BasicBlock, Block>,
+    pub(crate) local_map: IndexVec<Local, CPlace<'tcx>>,
+
+    /// When `#[track_caller]` is used, the implicit caller location is stored in this variable.
+    pub(crate) caller_location: Option<CValue<'tcx>>,
+
+    pub(crate) clif_comments: crate::pretty_clif::CommentWriter,
+
+    /// This should only be accessed by `CPlace::new_var`.
+    pub(crate) next_ssa_var: u32,
+}
+
+impl<'tcx> LayoutOfHelpers<'tcx> for FunctionCx<'_, '_, 'tcx> {
+    type LayoutOfResult = TyAndLayout<'tcx>;
+
+    #[inline]
+    fn handle_layout_err(&self, err: LayoutError<'tcx>, span: Span, ty: Ty<'tcx>) -> ! {
+        RevealAllLayoutCx(self.tcx).handle_layout_err(err, span, ty)
+    }
+}
+
+impl<'tcx> FnAbiOfHelpers<'tcx> for FunctionCx<'_, '_, 'tcx> {
+    type FnAbiOfResult = &'tcx FnAbi<'tcx, Ty<'tcx>>;
+
+    #[inline]
+    fn handle_fn_abi_err(
+        &self,
+        err: FnAbiError<'tcx>,
+        span: Span,
+        fn_abi_request: FnAbiRequest<'tcx>,
+    ) -> ! {
+        RevealAllLayoutCx(self.tcx).handle_fn_abi_err(err, span, fn_abi_request)
+    }
+}
+
+impl<'tcx> layout::HasTyCtxt<'tcx> for FunctionCx<'_, '_, 'tcx> {
+    fn tcx<'b>(&'b self) -> TyCtxt<'tcx> {
+        self.tcx
+    }
+}
+
+impl<'tcx> rustc_target::abi::HasDataLayout for FunctionCx<'_, '_, 'tcx> {
+    fn data_layout(&self) -> &rustc_target::abi::TargetDataLayout {
+        &self.tcx.data_layout
+    }
+}
+
+impl<'tcx> layout::HasParamEnv<'tcx> for FunctionCx<'_, '_, 'tcx> {
+    fn param_env(&self) -> ParamEnv<'tcx> {
+        ParamEnv::reveal_all()
+    }
+}
+
+impl<'tcx> HasTargetSpec for FunctionCx<'_, '_, 'tcx> {
+    fn target_spec(&self) -> &Target {
+        &self.tcx.sess.target
+    }
+}
+
+impl<'tcx> FunctionCx<'_, '_, 'tcx> {
+    pub(crate) fn monomorphize<T>(&self, value: T) -> T
+    where
+        T: TypeFoldable<TyCtxt<'tcx>> + Copy,
+    {
+        self.instance.instantiate_mir_and_normalize_erasing_regions(
+            self.tcx,
+            ty::ParamEnv::reveal_all(),
+            ty::EarlyBinder::bind(value),
+        )
+    }
+
+    pub(crate) fn clif_type(&self, ty: Ty<'tcx>) -> Option<Type> {
+        clif_type_from_ty(self.tcx, ty)
+    }
+
+    pub(crate) fn clif_pair_type(&self, ty: Ty<'tcx>) -> Option<(Type, Type)> {
+        clif_pair_type_from_ty(self.tcx, ty)
+    }
+
+    pub(crate) fn get_block(&self, bb: BasicBlock) -> Block {
+        *self.block_map.get(bb).unwrap()
+    }
+
+    pub(crate) fn get_local_place(&mut self, local: Local) -> CPlace<'tcx> {
+        *self.local_map.get(local).unwrap_or_else(|| {
+            panic!("Local {:?} doesn't exist", local);
+        })
+    }
+
+    pub(crate) fn create_stack_slot(&mut self, size: u32, align: u32) -> Pointer {
+        let abi_align = if self.tcx.sess.target.arch == "s390x" { 8 } else { 16 };
+        if align <= abi_align {
+            let stack_slot = self.bcx.create_sized_stack_slot(StackSlotData {
+                kind: StackSlotKind::ExplicitSlot,
+                // FIXME Don't force the size to a multiple of <abi_align> bytes once Cranelift gets
+                // a way to specify stack slot alignment.
+                size: (size + abi_align - 1) / abi_align * abi_align,
+            });
+            Pointer::stack_slot(stack_slot)
+        } else {
+            // Alignment is too big to handle using the above hack. Dynamically realign a stack slot
+            // instead. This wastes some space for the realignment.
+            let stack_slot = self.bcx.create_sized_stack_slot(StackSlotData {
+                kind: StackSlotKind::ExplicitSlot,
+                // FIXME Don't force the size to a multiple of <abi_align> bytes once Cranelift gets
+                // a way to specify stack slot alignment.
+                size: (size + align) / abi_align * abi_align,
+            });
+            let base_ptr = self.bcx.ins().stack_addr(self.pointer_type, stack_slot, 0);
+            let misalign_offset = self.bcx.ins().urem_imm(base_ptr, i64::from(align));
+            let realign_offset = self.bcx.ins().irsub_imm(misalign_offset, i64::from(align));
+            Pointer::new(self.bcx.ins().iadd(base_ptr, realign_offset))
+        }
+    }
+
+    pub(crate) fn set_debug_loc(&mut self, source_info: mir::SourceInfo) {
+        if let Some(debug_context) = &mut self.cx.debug_context {
+            let (file_id, line, column) =
+                debug_context.get_span_loc(self.tcx, self.mir.span, source_info.span);
+
+            let source_loc =
+                self.func_debug_cx.as_mut().unwrap().add_dbg_loc(file_id, line, column);
+            self.bcx.set_srcloc(source_loc);
+        }
+    }
+
+    pub(crate) fn get_caller_location(&mut self, source_info: mir::SourceInfo) -> CValue<'tcx> {
+        self.mir.caller_location_span(source_info, self.caller_location, self.tcx, |span| {
+            let const_loc = self.tcx.span_as_caller_location(span);
+            crate::constant::codegen_const_value(self, const_loc, self.tcx.caller_location_ty())
+        })
+    }
+
+    pub(crate) fn anonymous_str(&mut self, msg: &str) -> Value {
+        let mut data = DataDescription::new();
+        data.define(msg.as_bytes().to_vec().into_boxed_slice());
+        let msg_id = self.module.declare_anonymous_data(false, false).unwrap();
+
+        // Ignore DuplicateDefinition error, as the data will be the same
+        let _ = self.module.define_data(msg_id, &data);
+
+        let local_msg_id = self.module.declare_data_in_func(msg_id, self.bcx.func);
+        if self.clif_comments.enabled() {
+            self.add_comment(local_msg_id, msg);
+        }
+        self.bcx.ins().global_value(self.pointer_type, local_msg_id)
+    }
+}
+
+pub(crate) struct RevealAllLayoutCx<'tcx>(pub(crate) TyCtxt<'tcx>);
+
+impl<'tcx> LayoutOfHelpers<'tcx> for RevealAllLayoutCx<'tcx> {
+    type LayoutOfResult = TyAndLayout<'tcx>;
+
+    #[inline]
+    fn handle_layout_err(&self, err: LayoutError<'tcx>, span: Span, ty: Ty<'tcx>) -> ! {
+        if let LayoutError::SizeOverflow(_) | LayoutError::ReferencesError(_) = err {
+            self.0.sess.dcx().span_fatal(span, err.to_string())
+        } else {
+            self.0
+                .sess
+                .dcx()
+                .span_fatal(span, format!("failed to get layout for `{}`: {}", ty, err))
+        }
+    }
+}
+
+impl<'tcx> FnAbiOfHelpers<'tcx> for RevealAllLayoutCx<'tcx> {
+    type FnAbiOfResult = &'tcx FnAbi<'tcx, Ty<'tcx>>;
+
+    #[inline]
+    fn handle_fn_abi_err(
+        &self,
+        err: FnAbiError<'tcx>,
+        span: Span,
+        fn_abi_request: FnAbiRequest<'tcx>,
+    ) -> ! {
+        if let FnAbiError::Layout(LayoutError::SizeOverflow(_)) = err {
+            self.0.sess.dcx().emit_fatal(Spanned { span, node: err })
+        } else {
+            match fn_abi_request {
+                FnAbiRequest::OfFnPtr { sig, extra_args } => {
+                    span_bug!(span, "`fn_abi_of_fn_ptr({sig}, {extra_args:?})` failed: {err:?}");
+                }
+                FnAbiRequest::OfInstance { instance, extra_args } => {
+                    span_bug!(
+                        span,
+                        "`fn_abi_of_instance({instance}, {extra_args:?})` failed: {err:?}"
+                    );
+                }
+            }
+        }
+    }
+}
+
+impl<'tcx> layout::HasTyCtxt<'tcx> for RevealAllLayoutCx<'tcx> {
+    fn tcx<'b>(&'b self) -> TyCtxt<'tcx> {
+        self.0
+    }
+}
+
+impl<'tcx> rustc_target::abi::HasDataLayout for RevealAllLayoutCx<'tcx> {
+    fn data_layout(&self) -> &rustc_target::abi::TargetDataLayout {
+        &self.0.data_layout
+    }
+}
+
+impl<'tcx> layout::HasParamEnv<'tcx> for RevealAllLayoutCx<'tcx> {
+    fn param_env(&self) -> ParamEnv<'tcx> {
+        ParamEnv::reveal_all()
+    }
+}
+
+impl<'tcx> HasTargetSpec for RevealAllLayoutCx<'tcx> {
+    fn target_spec(&self) -> &Target {
+        &self.0.sess.target
+    }
+}
diff --git a/compiler/rustc_codegen_cranelift/src/compiler_builtins.rs b/compiler/rustc_codegen_cranelift/src/compiler_builtins.rs
new file mode 100644
index 00000000000..f3b963200a0
--- /dev/null
+++ b/compiler/rustc_codegen_cranelift/src/compiler_builtins.rs
@@ -0,0 +1,76 @@
+#[cfg(all(unix, feature = "jit"))]
+use std::ffi::c_int;
+#[cfg(feature = "jit")]
+use std::ffi::c_void;
+
+// FIXME replace with core::ffi::c_size_t once stablized
+#[allow(non_camel_case_types)]
+#[cfg(feature = "jit")]
+type size_t = usize;
+
+macro_rules! builtin_functions {
+    (
+        $register:ident;
+        $(
+            $(#[$attr:meta])?
+            fn $name:ident($($arg_name:ident: $arg_ty:ty),*) -> $ret_ty:ty;
+        )*
+    ) => {
+        #[cfg(feature = "jit")]
+        #[allow(improper_ctypes)]
+        extern "C" {
+            $(
+                $(#[$attr])?
+                fn $name($($arg_name: $arg_ty),*) -> $ret_ty;
+            )*
+        }
+
+        #[cfg(feature = "jit")]
+        pub(crate) fn $register(builder: &mut cranelift_jit::JITBuilder) {
+            for (name, val) in [$($(#[$attr])? (stringify!($name), $name as *const u8)),*] {
+                builder.symbol(name, val);
+            }
+        }
+    };
+}
+
+builtin_functions! {
+    register_functions_for_jit;
+
+    // integers
+    fn __multi3(a: i128, b: i128) -> i128;
+    fn __muloti4(n: i128, d: i128, oflow: &mut i32) -> i128;
+    fn __udivti3(n: u128, d: u128) -> u128;
+    fn __divti3(n: i128, d: i128) -> i128;
+    fn __umodti3(n: u128, d: u128) -> u128;
+    fn __modti3(n: i128, d: i128) -> i128;
+    fn __rust_u128_addo(a: u128, b: u128) -> (u128, bool);
+    fn __rust_i128_addo(a: i128, b: i128) -> (i128, bool);
+    fn __rust_u128_subo(a: u128, b: u128) -> (u128, bool);
+    fn __rust_i128_subo(a: i128, b: i128) -> (i128, bool);
+    fn __rust_u128_mulo(a: u128, b: u128) -> (u128, bool);
+    fn __rust_i128_mulo(a: i128, b: i128) -> (i128, bool);
+
+    // floats
+    fn __floattisf(i: i128) -> f32;
+    fn __floattidf(i: i128) -> f64;
+    fn __floatuntisf(i: u128) -> f32;
+    fn __floatuntidf(i: u128) -> f64;
+    fn __fixsfti(f: f32) -> i128;
+    fn __fixdfti(f: f64) -> i128;
+    fn __fixunssfti(f: f32) -> u128;
+    fn __fixunsdfti(f: f64) -> u128;
+
+    // allocator
+    // NOTE: These need to be mentioned here despite not being part of compiler_builtins because
+    // newer glibc resolve dlsym("malloc") to libc.so despite the override in the rustc binary to
+    // use jemalloc. Libraries opened with dlopen still get the jemalloc version, causing multiple
+    // allocators to be mixed, resulting in a crash.
+    fn calloc(nobj: size_t, size: size_t) -> *mut c_void;
+    #[cfg(unix)]
+    fn posix_memalign(memptr: *mut *mut c_void, align: size_t, size: size_t) -> c_int;
+    fn malloc(size: size_t) -> *mut c_void;
+    fn realloc(p: *mut c_void, size: size_t) -> *mut c_void;
+    fn free(p: *mut c_void) -> ();
+
+}
diff --git a/compiler/rustc_codegen_cranelift/src/concurrency_limiter.rs b/compiler/rustc_codegen_cranelift/src/concurrency_limiter.rs
new file mode 100644
index 00000000000..a73860cf18b
--- /dev/null
+++ b/compiler/rustc_codegen_cranelift/src/concurrency_limiter.rs
@@ -0,0 +1,206 @@
+use std::sync::{Arc, Condvar, Mutex};
+
+use jobserver::HelperThread;
+use rustc_session::Session;
+
+// FIXME don't panic when a worker thread panics
+
+pub(super) struct ConcurrencyLimiter {
+    helper_thread: Option<Mutex<HelperThread>>,
+    state: Arc<Mutex<state::ConcurrencyLimiterState>>,
+    available_token_condvar: Arc<Condvar>,
+    finished: bool,
+}
+
+impl ConcurrencyLimiter {
+    pub(super) fn new(sess: &Session, pending_jobs: usize) -> Self {
+        let state = Arc::new(Mutex::new(state::ConcurrencyLimiterState::new(pending_jobs)));
+        let available_token_condvar = Arc::new(Condvar::new());
+
+        let state_helper = state.clone();
+        let available_token_condvar_helper = available_token_condvar.clone();
+        let helper_thread = sess
+            .jobserver
+            .clone()
+            .into_helper_thread(move |token| {
+                let mut state = state_helper.lock().unwrap();
+                match token {
+                    Ok(token) => {
+                        state.add_new_token(token);
+                        available_token_condvar_helper.notify_one();
+                    }
+                    Err(err) => {
+                        state.poison(format!("failed to acquire jobserver token: {}", err));
+                        // Notify all threads waiting for a token to give them a chance to
+                        // gracefully exit.
+                        available_token_condvar_helper.notify_all();
+                    }
+                }
+            })
+            .unwrap();
+        ConcurrencyLimiter {
+            helper_thread: Some(Mutex::new(helper_thread)),
+            state,
+            available_token_condvar,
+            finished: false,
+        }
+    }
+
+    pub(super) fn acquire(&self, dcx: &rustc_errors::DiagCtxt) -> ConcurrencyLimiterToken {
+        let mut state = self.state.lock().unwrap();
+        loop {
+            state.assert_invariants();
+
+            match state.try_start_job() {
+                Ok(true) => {
+                    return ConcurrencyLimiterToken {
+                        state: self.state.clone(),
+                        available_token_condvar: self.available_token_condvar.clone(),
+                    };
+                }
+                Ok(false) => {}
+                Err(err) => {
+                    // An error happened when acquiring the token. Raise it as fatal error.
+                    // Make sure to drop the mutex guard first to prevent poisoning the mutex.
+                    drop(state);
+                    if let Some(err) = err {
+                        dcx.fatal(err);
+                    } else {
+                        // The error was already emitted, but compilation continued. Raise a silent
+                        // fatal error.
+                        rustc_errors::FatalError.raise();
+                    }
+                }
+            }
+
+            self.helper_thread.as_ref().unwrap().lock().unwrap().request_token();
+            state = self.available_token_condvar.wait(state).unwrap();
+        }
+    }
+
+    pub(crate) fn finished(mut self) {
+        self.helper_thread.take();
+
+        // Assert that all jobs have finished
+        let state = Mutex::get_mut(Arc::get_mut(&mut self.state).unwrap()).unwrap();
+        state.assert_done();
+
+        self.finished = true;
+    }
+}
+
+impl Drop for ConcurrencyLimiter {
+    fn drop(&mut self) {
+        if !self.finished && !std::thread::panicking() {
+            panic!("Forgot to call finished() on ConcurrencyLimiter");
+        }
+    }
+}
+
+#[derive(Debug)]
+pub(super) struct ConcurrencyLimiterToken {
+    state: Arc<Mutex<state::ConcurrencyLimiterState>>,
+    available_token_condvar: Arc<Condvar>,
+}
+
+impl Drop for ConcurrencyLimiterToken {
+    fn drop(&mut self) {
+        let mut state = self.state.lock().unwrap();
+        state.job_finished();
+        self.available_token_condvar.notify_one();
+    }
+}
+
+mod state {
+    use jobserver::Acquired;
+
+    #[derive(Debug)]
+    pub(super) struct ConcurrencyLimiterState {
+        pending_jobs: usize,
+        active_jobs: usize,
+
+        poisoned: bool,
+        stored_error: Option<String>,
+
+        // None is used to represent the implicit token, Some to represent explicit tokens
+        tokens: Vec<Option<Acquired>>,
+    }
+
+    impl ConcurrencyLimiterState {
+        pub(super) fn new(pending_jobs: usize) -> Self {
+            ConcurrencyLimiterState {
+                pending_jobs,
+                active_jobs: 0,
+                poisoned: false,
+                stored_error: None,
+                tokens: vec![None],
+            }
+        }
+
+        pub(super) fn assert_invariants(&self) {
+            // There must be no excess active jobs
+            assert!(self.active_jobs <= self.pending_jobs);
+
+            // There may not be more active jobs than there are tokens
+            assert!(self.active_jobs <= self.tokens.len());
+        }
+
+        pub(super) fn assert_done(&self) {
+            assert_eq!(self.pending_jobs, 0);
+            assert_eq!(self.active_jobs, 0);
+        }
+
+        pub(super) fn add_new_token(&mut self, token: Acquired) {
+            self.tokens.push(Some(token));
+            self.drop_excess_capacity();
+        }
+
+        pub(super) fn try_start_job(&mut self) -> Result<bool, Option<String>> {
+            if self.poisoned {
+                return Err(self.stored_error.take());
+            }
+
+            if self.active_jobs < self.tokens.len() {
+                // Using existing token
+                self.job_started();
+                return Ok(true);
+            }
+
+            Ok(false)
+        }
+
+        pub(super) fn job_started(&mut self) {
+            self.assert_invariants();
+            self.active_jobs += 1;
+            self.drop_excess_capacity();
+            self.assert_invariants();
+        }
+
+        pub(super) fn job_finished(&mut self) {
+            self.assert_invariants();
+            self.pending_jobs -= 1;
+            self.active_jobs -= 1;
+            self.assert_invariants();
+            self.drop_excess_capacity();
+            self.assert_invariants();
+        }
+
+        pub(super) fn poison(&mut self, error: String) {
+            self.poisoned = true;
+            self.stored_error = Some(error);
+        }
+
+        fn drop_excess_capacity(&mut self) {
+            self.assert_invariants();
+
+            // Drop all tokens that can never be used anymore
+            self.tokens.truncate(std::cmp::max(self.pending_jobs, 1));
+
+            // Keep some excess tokens to satisfy requests faster
+            const MAX_EXTRA_CAPACITY: usize = 2;
+            self.tokens.truncate(std::cmp::max(self.active_jobs + MAX_EXTRA_CAPACITY, 1));
+
+            self.assert_invariants();
+        }
+    }
+}
diff --git a/compiler/rustc_codegen_cranelift/src/config.rs b/compiler/rustc_codegen_cranelift/src/config.rs
new file mode 100644
index 00000000000..12bce680d9e
--- /dev/null
+++ b/compiler/rustc_codegen_cranelift/src/config.rs
@@ -0,0 +1,109 @@
+use std::env;
+use std::str::FromStr;
+
+fn bool_env_var(key: &str) -> bool {
+    env::var(key).as_deref() == Ok("1")
+}
+
+/// The mode to use for compilation.
+#[derive(Copy, Clone, Debug)]
+pub enum CodegenMode {
+    /// AOT compile the crate. This is the default.
+    Aot,
+    /// JIT compile and execute the crate.
+    Jit,
+    /// JIT compile and execute the crate, but only compile functions the first time they are used.
+    JitLazy,
+}
+
+impl FromStr for CodegenMode {
+    type Err = String;
+
+    fn from_str(s: &str) -> Result<Self, Self::Err> {
+        match s {
+            "aot" => Ok(CodegenMode::Aot),
+            "jit" => Ok(CodegenMode::Jit),
+            "jit-lazy" => Ok(CodegenMode::JitLazy),
+            _ => Err(format!("Unknown codegen mode `{}`", s)),
+        }
+    }
+}
+
+/// Configuration of cg_clif as passed in through `-Cllvm-args` and various env vars.
+#[derive(Clone, Debug)]
+pub struct BackendConfig {
+    /// Should the crate be AOT compiled or JIT executed.
+    ///
+    /// Defaults to AOT compilation. Can be set using `-Cllvm-args=mode=...`.
+    pub codegen_mode: CodegenMode,
+
+    /// When JIT mode is enable pass these arguments to the program.
+    ///
+    /// Defaults to the value of `CG_CLIF_JIT_ARGS`.
+    pub jit_args: Vec<String>,
+
+    /// Enable the Cranelift ir verifier for all compilation passes. If not set it will only run
+    /// once before passing the clif ir to Cranelift for compilation.
+    ///
+    /// Defaults to true when the `CG_CLIF_ENABLE_VERIFIER` env var is set to 1 or when cg_clif is
+    /// compiled with debug assertions enabled or false otherwise. Can be set using
+    /// `-Cllvm-args=enable_verifier=...`.
+    pub enable_verifier: bool,
+
+    /// Don't cache object files in the incremental cache. Useful during development of cg_clif
+    /// to make it possible to use incremental mode for all analyses performed by rustc without
+    /// caching object files when their content should have been changed by a change to cg_clif.
+    ///
+    /// Defaults to true when the `CG_CLIF_DISABLE_INCR_CACHE` env var is set to 1 or false
+    /// otherwise. Can be set using `-Cllvm-args=disable_incr_cache=...`.
+    pub disable_incr_cache: bool,
+}
+
+impl Default for BackendConfig {
+    fn default() -> Self {
+        BackendConfig {
+            codegen_mode: CodegenMode::Aot,
+            jit_args: {
+                match std::env::var("CG_CLIF_JIT_ARGS") {
+                    Ok(args) => args.split(' ').map(|arg| arg.to_string()).collect(),
+                    Err(std::env::VarError::NotPresent) => vec![],
+                    Err(std::env::VarError::NotUnicode(s)) => {
+                        panic!("CG_CLIF_JIT_ARGS not unicode: {:?}", s);
+                    }
+                }
+            },
+            enable_verifier: cfg!(debug_assertions) || bool_env_var("CG_CLIF_ENABLE_VERIFIER"),
+            disable_incr_cache: bool_env_var("CG_CLIF_DISABLE_INCR_CACHE"),
+        }
+    }
+}
+
+impl BackendConfig {
+    /// Parse the configuration passed in using `-Cllvm-args`.
+    pub fn from_opts(opts: &[String]) -> Result<Self, String> {
+        fn parse_bool(name: &str, value: &str) -> Result<bool, String> {
+            value.parse().map_err(|_| format!("failed to parse value `{}` for {}", value, name))
+        }
+
+        let mut config = BackendConfig::default();
+        for opt in opts {
+            if opt.starts_with("-import-instr-limit") {
+                // Silently ignore -import-instr-limit. It is set by rust's build system even when
+                // testing cg_clif.
+                continue;
+            }
+            if let Some((name, value)) = opt.split_once('=') {
+                match name {
+                    "mode" => config.codegen_mode = value.parse()?,
+                    "enable_verifier" => config.enable_verifier = parse_bool(name, value)?,
+                    "disable_incr_cache" => config.disable_incr_cache = parse_bool(name, value)?,
+                    _ => return Err(format!("Unknown option `{}`", name)),
+                }
+            } else {
+                return Err(format!("Invalid option `{}`", opt));
+            }
+        }
+
+        Ok(config)
+    }
+}
diff --git a/compiler/rustc_codegen_cranelift/src/constant.rs b/compiler/rustc_codegen_cranelift/src/constant.rs
new file mode 100644
index 00000000000..64e83e43d32
--- /dev/null
+++ b/compiler/rustc_codegen_cranelift/src/constant.rs
@@ -0,0 +1,574 @@
+//! Handling of `static`s, `const`s and promoted allocations
+
+use std::cmp::Ordering;
+
+use cranelift_module::*;
+use rustc_data_structures::fx::FxHashSet;
+use rustc_middle::middle::codegen_fn_attrs::CodegenFnAttrFlags;
+use rustc_middle::mir::interpret::{read_target_uint, AllocId, GlobalAlloc, Scalar};
+use rustc_middle::ty::{Binder, ExistentialTraitRef, ScalarInt};
+
+use crate::prelude::*;
+
+pub(crate) struct ConstantCx {
+    todo: Vec<TodoItem>,
+    anon_allocs: FxHashMap<AllocId, DataId>,
+}
+
+#[derive(Copy, Clone, Debug, Eq, PartialEq, Hash)]
+enum TodoItem {
+    Alloc(AllocId),
+    Static(DefId),
+}
+
+impl ConstantCx {
+    pub(crate) fn new() -> Self {
+        ConstantCx { todo: vec![], anon_allocs: FxHashMap::default() }
+    }
+
+    pub(crate) fn finalize(mut self, tcx: TyCtxt<'_>, module: &mut dyn Module) {
+        define_all_allocs(tcx, module, &mut self);
+    }
+}
+
+pub(crate) fn codegen_static(tcx: TyCtxt<'_>, module: &mut dyn Module, def_id: DefId) -> DataId {
+    let mut constants_cx = ConstantCx::new();
+    constants_cx.todo.push(TodoItem::Static(def_id));
+    constants_cx.finalize(tcx, module);
+
+    data_id_for_static(
+        tcx, module, def_id, false,
+        // For a declaration the stated mutability doesn't matter.
+        false,
+    )
+}
+
+pub(crate) fn codegen_tls_ref<'tcx>(
+    fx: &mut FunctionCx<'_, '_, 'tcx>,
+    def_id: DefId,
+    layout: TyAndLayout<'tcx>,
+) -> CValue<'tcx> {
+    let tls_ptr = if !def_id.is_local() && fx.tcx.needs_thread_local_shim(def_id) {
+        let instance = ty::Instance {
+            def: ty::InstanceDef::ThreadLocalShim(def_id),
+            args: ty::GenericArgs::empty(),
+        };
+        let func_ref = fx.get_function_ref(instance);
+        let call = fx.bcx.ins().call(func_ref, &[]);
+        fx.bcx.func.dfg.first_result(call)
+    } else {
+        let data_id = data_id_for_static(
+            fx.tcx, fx.module, def_id, false,
+            // For a declaration the stated mutability doesn't matter.
+            false,
+        );
+        let local_data_id = fx.module.declare_data_in_func(data_id, &mut fx.bcx.func);
+        if fx.clif_comments.enabled() {
+            fx.add_comment(local_data_id, format!("tls {:?}", def_id));
+        }
+        fx.bcx.ins().tls_value(fx.pointer_type, local_data_id)
+    };
+    CValue::by_val(tls_ptr, layout)
+}
+
+pub(crate) fn eval_mir_constant<'tcx>(
+    fx: &FunctionCx<'_, '_, 'tcx>,
+    constant: &ConstOperand<'tcx>,
+) -> (ConstValue<'tcx>, Ty<'tcx>) {
+    let cv = fx.monomorphize(constant.const_);
+    // This cannot fail because we checked all required_consts in advance.
+    let val = cv
+        .eval(fx.tcx, ty::ParamEnv::reveal_all(), constant.span)
+        .expect("erroneous constant missed by mono item collection");
+    (val, cv.ty())
+}
+
+pub(crate) fn codegen_constant_operand<'tcx>(
+    fx: &mut FunctionCx<'_, '_, 'tcx>,
+    constant: &ConstOperand<'tcx>,
+) -> CValue<'tcx> {
+    let (const_val, ty) = eval_mir_constant(fx, constant);
+    codegen_const_value(fx, const_val, ty)
+}
+
+pub(crate) fn codegen_const_value<'tcx>(
+    fx: &mut FunctionCx<'_, '_, 'tcx>,
+    const_val: ConstValue<'tcx>,
+    ty: Ty<'tcx>,
+) -> CValue<'tcx> {
+    let layout = fx.layout_of(ty);
+    assert!(layout.is_sized(), "unsized const value");
+
+    if layout.is_zst() {
+        return CValue::by_ref(crate::Pointer::dangling(layout.align.pref), layout);
+    }
+
+    match const_val {
+        ConstValue::ZeroSized => unreachable!(), // we already handled ZST above
+        ConstValue::Scalar(x) => match x {
+            Scalar::Int(int) => {
+                if fx.clif_type(layout.ty).is_some() {
+                    return CValue::const_val(fx, layout, int);
+                } else {
+                    let raw_val = int.size().truncate(int.assert_bits(int.size()));
+                    let val = match int.size().bytes() {
+                        1 => fx.bcx.ins().iconst(types::I8, raw_val as i64),
+                        2 => fx.bcx.ins().iconst(types::I16, raw_val as i64),
+                        4 => fx.bcx.ins().iconst(types::I32, raw_val as i64),
+                        8 => fx.bcx.ins().iconst(types::I64, raw_val as i64),
+                        16 => {
+                            let lsb = fx.bcx.ins().iconst(types::I64, raw_val as u64 as i64);
+                            let msb =
+                                fx.bcx.ins().iconst(types::I64, (raw_val >> 64) as u64 as i64);
+                            fx.bcx.ins().iconcat(lsb, msb)
+                        }
+                        _ => unreachable!(),
+                    };
+
+                    // FIXME avoid this extra copy to the stack and directly write to the final
+                    // destination
+                    let place = CPlace::new_stack_slot(fx, layout);
+                    place.to_ptr().store(fx, val, MemFlags::trusted());
+                    place.to_cvalue(fx)
+                }
+            }
+            Scalar::Ptr(ptr, _size) => {
+                let (prov, offset) = ptr.into_parts(); // we know the `offset` is relative
+                let alloc_id = prov.alloc_id();
+                let base_addr = match fx.tcx.global_alloc(alloc_id) {
+                    GlobalAlloc::Memory(alloc) => {
+                        if alloc.inner().len() == 0 {
+                            assert_eq!(offset, Size::ZERO);
+                            fx.bcx.ins().iconst(fx.pointer_type, alloc.inner().align.bytes() as i64)
+                        } else {
+                            let data_id = data_id_for_alloc_id(
+                                &mut fx.constants_cx,
+                                fx.module,
+                                alloc_id,
+                                alloc.inner().mutability,
+                            );
+                            let local_data_id =
+                                fx.module.declare_data_in_func(data_id, &mut fx.bcx.func);
+                            if fx.clif_comments.enabled() {
+                                fx.add_comment(local_data_id, format!("{:?}", alloc_id));
+                            }
+                            fx.bcx.ins().global_value(fx.pointer_type, local_data_id)
+                        }
+                    }
+                    GlobalAlloc::Function(instance) => {
+                        let func_id = crate::abi::import_function(fx.tcx, fx.module, instance);
+                        let local_func_id =
+                            fx.module.declare_func_in_func(func_id, &mut fx.bcx.func);
+                        fx.bcx.ins().func_addr(fx.pointer_type, local_func_id)
+                    }
+                    GlobalAlloc::VTable(ty, trait_ref) => {
+                        let data_id = data_id_for_vtable(
+                            fx.tcx,
+                            &mut fx.constants_cx,
+                            fx.module,
+                            ty,
+                            trait_ref,
+                        );
+                        let local_data_id =
+                            fx.module.declare_data_in_func(data_id, &mut fx.bcx.func);
+                        fx.bcx.ins().global_value(fx.pointer_type, local_data_id)
+                    }
+                    GlobalAlloc::Static(def_id) => {
+                        assert!(fx.tcx.is_static(def_id));
+                        let data_id = data_id_for_static(
+                            fx.tcx, fx.module, def_id, false,
+                            // For a declaration the stated mutability doesn't matter.
+                            false,
+                        );
+                        let local_data_id =
+                            fx.module.declare_data_in_func(data_id, &mut fx.bcx.func);
+                        if fx.clif_comments.enabled() {
+                            fx.add_comment(local_data_id, format!("{:?}", def_id));
+                        }
+                        fx.bcx.ins().global_value(fx.pointer_type, local_data_id)
+                    }
+                };
+                let val = if offset.bytes() != 0 {
+                    fx.bcx.ins().iadd_imm(base_addr, i64::try_from(offset.bytes()).unwrap())
+                } else {
+                    base_addr
+                };
+                CValue::by_val(val, layout)
+            }
+        },
+        ConstValue::Indirect { alloc_id, offset } => CValue::by_ref(
+            pointer_for_allocation(fx, alloc_id)
+                .offset_i64(fx, i64::try_from(offset.bytes()).unwrap()),
+            layout,
+        ),
+        ConstValue::Slice { data, meta } => {
+            let alloc_id = fx.tcx.reserve_and_set_memory_alloc(data);
+            let ptr = pointer_for_allocation(fx, alloc_id).get_addr(fx);
+            let len = fx.bcx.ins().iconst(fx.pointer_type, meta as i64);
+            CValue::by_val_pair(ptr, len, layout)
+        }
+    }
+}
+
+fn pointer_for_allocation<'tcx>(
+    fx: &mut FunctionCx<'_, '_, 'tcx>,
+    alloc_id: AllocId,
+) -> crate::pointer::Pointer {
+    let alloc = fx.tcx.global_alloc(alloc_id).unwrap_memory();
+    let data_id =
+        data_id_for_alloc_id(&mut fx.constants_cx, fx.module, alloc_id, alloc.inner().mutability);
+
+    let local_data_id = fx.module.declare_data_in_func(data_id, &mut fx.bcx.func);
+    if fx.clif_comments.enabled() {
+        fx.add_comment(local_data_id, format!("{:?}", alloc_id));
+    }
+    let global_ptr = fx.bcx.ins().global_value(fx.pointer_type, local_data_id);
+    crate::pointer::Pointer::new(global_ptr)
+}
+
+pub(crate) fn data_id_for_alloc_id(
+    cx: &mut ConstantCx,
+    module: &mut dyn Module,
+    alloc_id: AllocId,
+    mutability: rustc_hir::Mutability,
+) -> DataId {
+    cx.todo.push(TodoItem::Alloc(alloc_id));
+    *cx.anon_allocs
+        .entry(alloc_id)
+        .or_insert_with(|| module.declare_anonymous_data(mutability.is_mut(), false).unwrap())
+}
+
+pub(crate) fn data_id_for_vtable<'tcx>(
+    tcx: TyCtxt<'tcx>,
+    cx: &mut ConstantCx,
+    module: &mut dyn Module,
+    ty: Ty<'tcx>,
+    trait_ref: Option<Binder<'tcx, ExistentialTraitRef<'tcx>>>,
+) -> DataId {
+    let alloc_id = tcx.vtable_allocation((ty, trait_ref));
+    data_id_for_alloc_id(cx, module, alloc_id, Mutability::Not)
+}
+
+fn data_id_for_static(
+    tcx: TyCtxt<'_>,
+    module: &mut dyn Module,
+    def_id: DefId,
+    definition: bool,
+    definition_writable: bool,
+) -> DataId {
+    let attrs = tcx.codegen_fn_attrs(def_id);
+
+    let instance = Instance::mono(tcx, def_id);
+    let symbol_name = tcx.symbol_name(instance).name;
+
+    if let Some(import_linkage) = attrs.import_linkage {
+        assert!(!definition);
+        assert!(!tcx.is_mutable_static(def_id));
+
+        let ty = instance.ty(tcx, ParamEnv::reveal_all());
+        let align = tcx.layout_of(ParamEnv::reveal_all().and(ty)).unwrap().align.pref.bytes();
+
+        let linkage = if import_linkage == rustc_middle::mir::mono::Linkage::ExternalWeak
+            || import_linkage == rustc_middle::mir::mono::Linkage::WeakAny
+        {
+            Linkage::Preemptible
+        } else {
+            Linkage::Import
+        };
+
+        let data_id = match module.declare_data(
+            symbol_name,
+            linkage,
+            false,
+            attrs.flags.contains(CodegenFnAttrFlags::THREAD_LOCAL),
+        ) {
+            Ok(data_id) => data_id,
+            Err(ModuleError::IncompatibleDeclaration(_)) => tcx.dcx().fatal(format!(
+                "attempt to declare `{symbol_name}` as static, but it was already declared as function"
+            )),
+            Err(err) => Err::<_, _>(err).unwrap(),
+        };
+
+        // Comment copied from https://github.com/rust-lang/rust/blob/45060c2a66dfd667f88bd8b94261b28a58d85bd5/src/librustc_codegen_llvm/consts.rs#L141
+        // Declare an internal global `extern_with_linkage_foo` which
+        // is initialized with the address of `foo`. If `foo` is
+        // discarded during linking (for example, if `foo` has weak
+        // linkage and there are no definitions), then
+        // `extern_with_linkage_foo` will instead be initialized to
+        // zero.
+
+        let ref_name = format!("_rust_extern_with_linkage_{}", symbol_name);
+        let ref_data_id = module.declare_data(&ref_name, Linkage::Local, false, false).unwrap();
+        let mut data = DataDescription::new();
+        data.set_align(align);
+        let data_gv = module.declare_data_in_data(data_id, &mut data);
+        data.define(std::iter::repeat(0).take(pointer_ty(tcx).bytes() as usize).collect());
+        data.write_data_addr(0, data_gv, 0);
+        match module.define_data(ref_data_id, &data) {
+            // Every time the static is referenced there will be another definition of this global,
+            // so duplicate definitions are expected and allowed.
+            Err(ModuleError::DuplicateDefinition(_)) => {}
+            res => res.unwrap(),
+        }
+
+        return ref_data_id;
+    }
+
+    let linkage = if definition {
+        crate::linkage::get_static_linkage(tcx, def_id)
+    } else if attrs.linkage == Some(rustc_middle::mir::mono::Linkage::ExternalWeak)
+        || attrs.linkage == Some(rustc_middle::mir::mono::Linkage::WeakAny)
+    {
+        Linkage::Preemptible
+    } else {
+        Linkage::Import
+    };
+
+    let data_id = match module.declare_data(
+        symbol_name,
+        linkage,
+        definition_writable,
+        attrs.flags.contains(CodegenFnAttrFlags::THREAD_LOCAL),
+    ) {
+        Ok(data_id) => data_id,
+        Err(ModuleError::IncompatibleDeclaration(_)) => tcx.dcx().fatal(format!(
+            "attempt to declare `{symbol_name}` as static, but it was already declared as function"
+        )),
+        Err(err) => Err::<_, _>(err).unwrap(),
+    };
+
+    data_id
+}
+
+fn define_all_allocs(tcx: TyCtxt<'_>, module: &mut dyn Module, cx: &mut ConstantCx) {
+    let mut done = FxHashSet::default();
+    while let Some(todo_item) = cx.todo.pop() {
+        if !done.insert(todo_item) {
+            continue;
+        }
+
+        let (data_id, alloc, section_name) = match todo_item {
+            TodoItem::Alloc(alloc_id) => {
+                let alloc = match tcx.global_alloc(alloc_id) {
+                    GlobalAlloc::Memory(alloc) => alloc,
+                    GlobalAlloc::Function(_) | GlobalAlloc::Static(_) | GlobalAlloc::VTable(..) => {
+                        unreachable!()
+                    }
+                };
+                // FIXME: should we have a cache so we don't do this multiple times for the same `ConstAllocation`?
+                let data_id = *cx.anon_allocs.entry(alloc_id).or_insert_with(|| {
+                    module.declare_anonymous_data(alloc.inner().mutability.is_mut(), false).unwrap()
+                });
+                (data_id, alloc, None)
+            }
+            TodoItem::Static(def_id) => {
+                let section_name = tcx.codegen_fn_attrs(def_id).link_section;
+
+                let alloc = tcx.eval_static_initializer(def_id).unwrap();
+
+                let data_id = data_id_for_static(
+                    tcx,
+                    module,
+                    def_id,
+                    true,
+                    alloc.inner().mutability == Mutability::Mut,
+                );
+                (data_id, alloc, section_name)
+            }
+        };
+
+        let mut data = DataDescription::new();
+        let alloc = alloc.inner();
+        data.set_align(alloc.align.bytes());
+
+        if let Some(section_name) = section_name {
+            let (segment_name, section_name) = if tcx.sess.target.is_like_osx {
+                let section_name = section_name.as_str();
+                if let Some(names) = section_name.split_once(',') {
+                    names
+                } else {
+                    tcx.dcx().fatal(format!(
+                        "#[link_section = \"{}\"] is not valid for macos target: must be segment and section separated by comma",
+                        section_name
+                    ));
+                }
+            } else {
+                ("", section_name.as_str())
+            };
+            data.set_segment_section(segment_name, section_name);
+        }
+
+        let bytes = alloc.inspect_with_uninit_and_ptr_outside_interpreter(0..alloc.len()).to_vec();
+        data.define(bytes.into_boxed_slice());
+
+        for &(offset, prov) in alloc.provenance().ptrs().iter() {
+            let alloc_id = prov.alloc_id();
+            let addend = {
+                let endianness = tcx.data_layout.endian;
+                let offset = offset.bytes() as usize;
+                let ptr_size = tcx.data_layout.pointer_size;
+                let bytes = &alloc.inspect_with_uninit_and_ptr_outside_interpreter(
+                    offset..offset + ptr_size.bytes() as usize,
+                );
+                read_target_uint(endianness, bytes).unwrap()
+            };
+
+            let reloc_target_alloc = tcx.global_alloc(alloc_id);
+            let data_id = match reloc_target_alloc {
+                GlobalAlloc::Function(instance) => {
+                    assert_eq!(addend, 0);
+                    let func_id =
+                        crate::abi::import_function(tcx, module, instance.polymorphize(tcx));
+                    let local_func_id = module.declare_func_in_data(func_id, &mut data);
+                    data.write_function_addr(offset.bytes() as u32, local_func_id);
+                    continue;
+                }
+                GlobalAlloc::Memory(target_alloc) => {
+                    data_id_for_alloc_id(cx, module, alloc_id, target_alloc.inner().mutability)
+                }
+                GlobalAlloc::VTable(ty, trait_ref) => {
+                    data_id_for_vtable(tcx, cx, module, ty, trait_ref)
+                }
+                GlobalAlloc::Static(def_id) => {
+                    if tcx.codegen_fn_attrs(def_id).flags.contains(CodegenFnAttrFlags::THREAD_LOCAL)
+                    {
+                        tcx.dcx().fatal(format!(
+                            "Allocation {:?} contains reference to TLS value {:?}",
+                            alloc_id, def_id
+                        ));
+                    }
+
+                    // Don't push a `TodoItem::Static` here, as it will cause statics used by
+                    // multiple crates to be duplicated between them. It isn't necessary anyway,
+                    // as it will get pushed by `codegen_static` when necessary.
+                    data_id_for_static(
+                        tcx, module, def_id, false,
+                        // For a declaration the stated mutability doesn't matter.
+                        false,
+                    )
+                }
+            };
+
+            let global_value = module.declare_data_in_data(data_id, &mut data);
+            data.write_data_addr(offset.bytes() as u32, global_value, addend as i64);
+        }
+
+        module.define_data(data_id, &data).unwrap();
+    }
+
+    assert!(cx.todo.is_empty(), "{:?}", cx.todo);
+}
+
+/// Used only for intrinsic implementations that need a compile-time constant
+pub(crate) fn mir_operand_get_const_val<'tcx>(
+    fx: &FunctionCx<'_, '_, 'tcx>,
+    operand: &Operand<'tcx>,
+) -> Option<ScalarInt> {
+    match operand {
+        Operand::Constant(const_) => eval_mir_constant(fx, const_).0.try_to_scalar_int(),
+        // FIXME(rust-lang/rust#85105): Casts like `IMM8 as u32` result in the const being stored
+        // inside a temporary before being passed to the intrinsic requiring the const argument.
+        // This code tries to find a single constant defining definition of the referenced local.
+        Operand::Copy(place) | Operand::Move(place) => {
+            if !place.projection.is_empty() {
+                return None;
+            }
+            let mut computed_scalar_int = None;
+            for bb_data in fx.mir.basic_blocks.iter() {
+                for stmt in &bb_data.statements {
+                    match &stmt.kind {
+                        StatementKind::Assign(local_and_rvalue) if &local_and_rvalue.0 == place => {
+                            match &local_and_rvalue.1 {
+                                Rvalue::Cast(
+                                    CastKind::IntToInt
+                                    | CastKind::FloatToFloat
+                                    | CastKind::FloatToInt
+                                    | CastKind::IntToFloat
+                                    | CastKind::FnPtrToPtr
+                                    | CastKind::PtrToPtr,
+                                    operand,
+                                    ty,
+                                ) => {
+                                    if computed_scalar_int.is_some() {
+                                        return None; // local assigned twice
+                                    }
+                                    if !matches!(ty.kind(), ty::Uint(_) | ty::Int(_)) {
+                                        return None;
+                                    }
+                                    let scalar_int = mir_operand_get_const_val(fx, operand)?;
+                                    let scalar_int =
+                                        match fx.layout_of(*ty).size.cmp(&scalar_int.size()) {
+                                            Ordering::Equal => scalar_int,
+                                            Ordering::Less => match ty.kind() {
+                                                ty::Uint(_) => ScalarInt::try_from_uint(
+                                                    scalar_int.assert_uint(scalar_int.size()),
+                                                    fx.layout_of(*ty).size,
+                                                )
+                                                .unwrap(),
+                                                ty::Int(_) => ScalarInt::try_from_int(
+                                                    scalar_int.assert_int(scalar_int.size()),
+                                                    fx.layout_of(*ty).size,
+                                                )
+                                                .unwrap(),
+                                                _ => unreachable!(),
+                                            },
+                                            Ordering::Greater => return None,
+                                        };
+                                    computed_scalar_int = Some(scalar_int);
+                                }
+                                Rvalue::Use(operand) => {
+                                    computed_scalar_int = mir_operand_get_const_val(fx, operand)
+                                }
+                                _ => return None,
+                            }
+                        }
+                        StatementKind::SetDiscriminant { place: stmt_place, variant_index: _ }
+                            if &**stmt_place == place =>
+                        {
+                            return None;
+                        }
+                        StatementKind::Intrinsic(ref intrinsic) => match **intrinsic {
+                            NonDivergingIntrinsic::CopyNonOverlapping(..) => return None,
+                            NonDivergingIntrinsic::Assume(..) => {}
+                        },
+                        // conservative handling
+                        StatementKind::Assign(_)
+                        | StatementKind::FakeRead(_)
+                        | StatementKind::SetDiscriminant { .. }
+                        | StatementKind::Deinit(_)
+                        | StatementKind::StorageLive(_)
+                        | StatementKind::StorageDead(_)
+                        | StatementKind::Retag(_, _)
+                        | StatementKind::AscribeUserType(_, _)
+                        | StatementKind::PlaceMention(..)
+                        | StatementKind::Coverage(_)
+                        | StatementKind::ConstEvalCounter
+                        | StatementKind::Nop => {}
+                    }
+                }
+                match &bb_data.terminator().kind {
+                    TerminatorKind::Goto { .. }
+                    | TerminatorKind::SwitchInt { .. }
+                    | TerminatorKind::UnwindResume
+                    | TerminatorKind::UnwindTerminate(_)
+                    | TerminatorKind::Return
+                    | TerminatorKind::Unreachable
+                    | TerminatorKind::Drop { .. }
+                    | TerminatorKind::Assert { .. } => {}
+                    TerminatorKind::Yield { .. }
+                    | TerminatorKind::CoroutineDrop
+                    | TerminatorKind::FalseEdge { .. }
+                    | TerminatorKind::FalseUnwind { .. } => unreachable!(),
+                    TerminatorKind::InlineAsm { .. } => return None,
+                    TerminatorKind::Call { destination, target: Some(_), .. }
+                        if destination == place =>
+                    {
+                        return None;
+                    }
+                    TerminatorKind::Call { .. } => {}
+                }
+            }
+            computed_scalar_int
+        }
+    }
+}
diff --git a/compiler/rustc_codegen_cranelift/src/debuginfo/emit.rs b/compiler/rustc_codegen_cranelift/src/debuginfo/emit.rs
new file mode 100644
index 00000000000..36af7d4450d
--- /dev/null
+++ b/compiler/rustc_codegen_cranelift/src/debuginfo/emit.rs
@@ -0,0 +1,203 @@
+//! Write the debuginfo into an object file.
+
+use cranelift_module::{DataId, FuncId};
+use cranelift_object::ObjectProduct;
+use gimli::write::{Address, AttributeValue, EndianVec, Result, Sections, Writer};
+use gimli::{RunTimeEndian, SectionId};
+use rustc_data_structures::fx::FxHashMap;
+
+use super::object::WriteDebugInfo;
+use super::DebugContext;
+
+pub(super) fn address_for_func(func_id: FuncId) -> Address {
+    let symbol = func_id.as_u32();
+    assert!(symbol & 1 << 31 == 0);
+    Address::Symbol { symbol: symbol as usize, addend: 0 }
+}
+
+pub(super) fn address_for_data(data_id: DataId) -> Address {
+    let symbol = data_id.as_u32();
+    assert!(symbol & 1 << 31 == 0);
+    Address::Symbol { symbol: (symbol | 1 << 31) as usize, addend: 0 }
+}
+
+impl DebugContext {
+    pub(crate) fn emit(&mut self, product: &mut ObjectProduct) {
+        let unit_range_list_id = self.dwarf.unit.ranges.add(self.unit_range_list.clone());
+        let root = self.dwarf.unit.root();
+        let root = self.dwarf.unit.get_mut(root);
+        root.set(gimli::DW_AT_ranges, AttributeValue::RangeListRef(unit_range_list_id));
+
+        let mut sections = Sections::new(WriterRelocate::new(self.endian));
+        self.dwarf.write(&mut sections).unwrap();
+
+        let mut section_map = FxHashMap::default();
+        let _: Result<()> = sections.for_each_mut(|id, section| {
+            if !section.writer.slice().is_empty() {
+                let section_id = product.add_debug_section(id, section.writer.take());
+                section_map.insert(id, section_id);
+            }
+            Ok(())
+        });
+
+        let _: Result<()> = sections.for_each(|id, section| {
+            if let Some(section_id) = section_map.get(&id) {
+                for reloc in &section.relocs {
+                    product.add_debug_reloc(&section_map, section_id, reloc);
+                }
+            }
+            Ok(())
+        });
+    }
+}
+
+#[derive(Clone)]
+pub(crate) struct DebugReloc {
+    pub(crate) offset: u32,
+    pub(crate) size: u8,
+    pub(crate) name: DebugRelocName,
+    pub(crate) addend: i64,
+    pub(crate) kind: object::RelocationKind,
+}
+
+#[derive(Clone)]
+pub(crate) enum DebugRelocName {
+    Section(SectionId),
+    Symbol(usize),
+}
+
+/// A [`Writer`] that collects all necessary relocations.
+#[derive(Clone)]
+pub(super) struct WriterRelocate {
+    pub(super) relocs: Vec<DebugReloc>,
+    pub(super) writer: EndianVec<RunTimeEndian>,
+}
+
+impl WriterRelocate {
+    pub(super) fn new(endian: RunTimeEndian) -> Self {
+        WriterRelocate { relocs: Vec::new(), writer: EndianVec::new(endian) }
+    }
+
+    /// Perform the collected relocations to be usable for JIT usage.
+    #[cfg(all(feature = "jit", not(windows)))]
+    pub(super) fn relocate_for_jit(mut self, jit_module: &cranelift_jit::JITModule) -> Vec<u8> {
+        for reloc in self.relocs.drain(..) {
+            match reloc.name {
+                super::DebugRelocName::Section(_) => unreachable!(),
+                super::DebugRelocName::Symbol(sym) => {
+                    let addr = jit_module.get_finalized_function(
+                        cranelift_module::FuncId::from_u32(sym.try_into().unwrap()),
+                    );
+                    let val = (addr as u64 as i64 + reloc.addend) as u64;
+                    self.writer.write_udata_at(reloc.offset as usize, val, reloc.size).unwrap();
+                }
+            }
+        }
+        self.writer.into_vec()
+    }
+}
+
+impl Writer for WriterRelocate {
+    type Endian = RunTimeEndian;
+
+    fn endian(&self) -> Self::Endian {
+        self.writer.endian()
+    }
+
+    fn len(&self) -> usize {
+        self.writer.len()
+    }
+
+    fn write(&mut self, bytes: &[u8]) -> Result<()> {
+        self.writer.write(bytes)
+    }
+
+    fn write_at(&mut self, offset: usize, bytes: &[u8]) -> Result<()> {
+        self.writer.write_at(offset, bytes)
+    }
+
+    fn write_address(&mut self, address: Address, size: u8) -> Result<()> {
+        match address {
+            Address::Constant(val) => self.write_udata(val, size),
+            Address::Symbol { symbol, addend } => {
+                let offset = self.len() as u64;
+                self.relocs.push(DebugReloc {
+                    offset: offset as u32,
+                    size,
+                    name: DebugRelocName::Symbol(symbol),
+                    addend,
+                    kind: object::RelocationKind::Absolute,
+                });
+                self.write_udata(0, size)
+            }
+        }
+    }
+
+    fn write_offset(&mut self, val: usize, section: SectionId, size: u8) -> Result<()> {
+        let offset = self.len() as u32;
+        self.relocs.push(DebugReloc {
+            offset,
+            size,
+            name: DebugRelocName::Section(section),
+            addend: val as i64,
+            kind: object::RelocationKind::Absolute,
+        });
+        self.write_udata(0, size)
+    }
+
+    fn write_offset_at(
+        &mut self,
+        offset: usize,
+        val: usize,
+        section: SectionId,
+        size: u8,
+    ) -> Result<()> {
+        self.relocs.push(DebugReloc {
+            offset: offset as u32,
+            size,
+            name: DebugRelocName::Section(section),
+            addend: val as i64,
+            kind: object::RelocationKind::Absolute,
+        });
+        self.write_udata_at(offset, 0, size)
+    }
+
+    fn write_eh_pointer(&mut self, address: Address, eh_pe: gimli::DwEhPe, size: u8) -> Result<()> {
+        match address {
+            // Address::Constant arm copied from gimli
+            Address::Constant(val) => {
+                // Indirect doesn't matter here.
+                let val = match eh_pe.application() {
+                    gimli::DW_EH_PE_absptr => val,
+                    gimli::DW_EH_PE_pcrel => {
+                        // FIXME better handling of sign
+                        let offset = self.len() as u64;
+                        offset.wrapping_sub(val)
+                    }
+                    _ => {
+                        return Err(gimli::write::Error::UnsupportedPointerEncoding(eh_pe));
+                    }
+                };
+                self.write_eh_pointer_data(val, eh_pe.format(), size)
+            }
+            Address::Symbol { symbol, addend } => match eh_pe.application() {
+                gimli::DW_EH_PE_pcrel => {
+                    let size = match eh_pe.format() {
+                        gimli::DW_EH_PE_sdata4 => 4,
+                        gimli::DW_EH_PE_sdata8 => 8,
+                        _ => return Err(gimli::write::Error::UnsupportedPointerEncoding(eh_pe)),
+                    };
+                    self.relocs.push(DebugReloc {
+                        offset: self.len() as u32,
+                        size,
+                        name: DebugRelocName::Symbol(symbol),
+                        addend,
+                        kind: object::RelocationKind::Relative,
+                    });
+                    self.write_udata(0, size)
+                }
+                _ => Err(gimli::write::Error::UnsupportedPointerEncoding(eh_pe)),
+            },
+        }
+    }
+}
diff --git a/compiler/rustc_codegen_cranelift/src/debuginfo/line_info.rs b/compiler/rustc_codegen_cranelift/src/debuginfo/line_info.rs
new file mode 100644
index 00000000000..78b3d5a58f4
--- /dev/null
+++ b/compiler/rustc_codegen_cranelift/src/debuginfo/line_info.rs
@@ -0,0 +1,172 @@
+//! Line info generation (`.debug_line`)
+
+use std::ffi::OsStr;
+use std::path::{Component, Path};
+
+use cranelift_codegen::binemit::CodeOffset;
+use cranelift_codegen::MachSrcLoc;
+use gimli::write::{AttributeValue, FileId, FileInfo, LineProgram, LineString, LineStringTable};
+use rustc_span::{
+    hygiene, FileName, Pos, SourceFile, SourceFileAndLine, SourceFileHash, SourceFileHashAlgorithm,
+};
+
+use crate::debuginfo::emit::address_for_func;
+use crate::debuginfo::FunctionDebugContext;
+use crate::prelude::*;
+
+// OPTIMIZATION: It is cheaper to do this in one pass than using `.parent()` and `.file_name()`.
+fn split_path_dir_and_file(path: &Path) -> (&Path, &OsStr) {
+    let mut iter = path.components();
+    let file_name = match iter.next_back() {
+        Some(Component::Normal(p)) => p,
+        component => {
+            panic!(
+                "Path component {:?} of path {} is an invalid filename",
+                component,
+                path.display()
+            );
+        }
+    };
+    let parent = iter.as_path();
+    (parent, file_name)
+}
+
+// OPTIMIZATION: Avoid UTF-8 validation on UNIX.
+fn osstr_as_utf8_bytes(path: &OsStr) -> &[u8] {
+    #[cfg(unix)]
+    {
+        use std::os::unix::ffi::OsStrExt;
+        path.as_bytes()
+    }
+    #[cfg(not(unix))]
+    {
+        path.to_str().unwrap().as_bytes()
+    }
+}
+
+const MD5_LEN: usize = 16;
+
+fn make_file_info(hash: SourceFileHash) -> Option<FileInfo> {
+    if hash.kind == SourceFileHashAlgorithm::Md5 {
+        let mut buf = [0u8; MD5_LEN];
+        buf.copy_from_slice(hash.hash_bytes());
+        Some(FileInfo { timestamp: 0, size: 0, md5: buf })
+    } else {
+        None
+    }
+}
+
+impl DebugContext {
+    pub(crate) fn get_span_loc(
+        &mut self,
+        tcx: TyCtxt<'_>,
+        function_span: Span,
+        span: Span,
+    ) -> (FileId, u64, u64) {
+        // Match behavior of `FunctionCx::adjusted_span_and_dbg_scope`.
+        let span = hygiene::walk_chain_collapsed(span, function_span);
+        match tcx.sess.source_map().lookup_line(span.lo()) {
+            Ok(SourceFileAndLine { sf: file, line }) => {
+                let file_id = self.add_source_file(&file);
+                let line_pos = file.lines()[line];
+                let col = file.relative_position(span.lo()) - line_pos;
+
+                (file_id, u64::try_from(line).unwrap() + 1, u64::from(col.to_u32()) + 1)
+            }
+            Err(file) => (self.add_source_file(&file), 0, 0),
+        }
+    }
+
+    pub(crate) fn add_source_file(&mut self, source_file: &SourceFile) -> FileId {
+        let cache_key = (source_file.stable_id, source_file.src_hash);
+        *self.created_files.entry(cache_key).or_insert_with(|| {
+            let line_program: &mut LineProgram = &mut self.dwarf.unit.line_program;
+            let line_strings: &mut LineStringTable = &mut self.dwarf.line_strings;
+
+            match &source_file.name {
+                FileName::Real(path) => {
+                    let (dir_path, file_name) =
+                        split_path_dir_and_file(path.to_path(self.filename_display_preference));
+                    let dir_name = osstr_as_utf8_bytes(dir_path.as_os_str());
+                    let file_name = osstr_as_utf8_bytes(file_name);
+
+                    let dir_id = if !dir_name.is_empty() {
+                        let dir_name =
+                            LineString::new(dir_name, line_program.encoding(), line_strings);
+                        line_program.add_directory(dir_name)
+                    } else {
+                        line_program.default_directory()
+                    };
+                    let file_name =
+                        LineString::new(file_name, line_program.encoding(), line_strings);
+
+                    let info = make_file_info(source_file.src_hash);
+
+                    line_program.file_has_md5 &= info.is_some();
+                    line_program.add_file(file_name, dir_id, info)
+                }
+                filename => {
+                    let dir_id = line_program.default_directory();
+                    let dummy_file_name = LineString::new(
+                        filename.display(self.filename_display_preference).to_string().into_bytes(),
+                        line_program.encoding(),
+                        line_strings,
+                    );
+                    line_program.add_file(dummy_file_name, dir_id, None)
+                }
+            }
+        })
+    }
+}
+
+impl FunctionDebugContext {
+    pub(crate) fn add_dbg_loc(&mut self, file_id: FileId, line: u64, column: u64) -> SourceLoc {
+        let (index, _) = self.source_loc_set.insert_full((file_id, line, column));
+        SourceLoc::new(u32::try_from(index).unwrap())
+    }
+
+    pub(super) fn create_debug_lines(
+        &mut self,
+        debug_context: &mut DebugContext,
+        func_id: FuncId,
+        context: &Context,
+    ) -> CodeOffset {
+        let create_row_for_span =
+            |debug_context: &mut DebugContext, source_loc: (FileId, u64, u64)| {
+                let (file_id, line, col) = source_loc;
+
+                debug_context.dwarf.unit.line_program.row().file = file_id;
+                debug_context.dwarf.unit.line_program.row().line = line;
+                debug_context.dwarf.unit.line_program.row().column = col;
+                debug_context.dwarf.unit.line_program.generate_row();
+            };
+
+        debug_context.dwarf.unit.line_program.begin_sequence(Some(address_for_func(func_id)));
+
+        let mut func_end = 0;
+
+        let mcr = context.compiled_code().unwrap();
+        for &MachSrcLoc { start, end, loc } in mcr.buffer.get_srclocs_sorted() {
+            debug_context.dwarf.unit.line_program.row().address_offset = u64::from(start);
+            if !loc.is_default() {
+                let source_loc = self.source_loc_set[loc.bits() as usize];
+                create_row_for_span(debug_context, source_loc);
+            } else {
+                create_row_for_span(debug_context, self.function_source_loc);
+            }
+            func_end = end;
+        }
+
+        debug_context.dwarf.unit.line_program.end_sequence(u64::from(func_end));
+
+        let func_end = mcr.buffer.total_size();
+
+        assert_ne!(func_end, 0);
+
+        let entry = debug_context.dwarf.unit.get_mut(self.entry_id);
+        entry.set(gimli::DW_AT_low_pc, AttributeValue::Address(address_for_func(func_id)));
+        entry.set(gimli::DW_AT_high_pc, AttributeValue::Udata(u64::from(func_end)));
+
+        func_end
+    }
+}
diff --git a/compiler/rustc_codegen_cranelift/src/debuginfo/mod.rs b/compiler/rustc_codegen_cranelift/src/debuginfo/mod.rs
new file mode 100644
index 00000000000..f0b78e5d7c6
--- /dev/null
+++ b/compiler/rustc_codegen_cranelift/src/debuginfo/mod.rs
@@ -0,0 +1,337 @@
+//! Handling of everything related to debuginfo.
+
+mod emit;
+mod line_info;
+mod object;
+mod types;
+mod unwind;
+
+use cranelift_codegen::ir::Endianness;
+use cranelift_codegen::isa::TargetIsa;
+use cranelift_module::DataId;
+use gimli::write::{
+    Address, AttributeValue, DwarfUnit, Expression, FileId, LineProgram, LineString, Range,
+    RangeList, UnitEntryId,
+};
+use gimli::{AArch64, Encoding, Format, LineEncoding, Register, RiscV, RunTimeEndian, X86_64};
+use indexmap::IndexSet;
+use rustc_codegen_ssa::debuginfo::type_names;
+use rustc_hir::def::DefKind;
+use rustc_hir::def_id::DefIdMap;
+use rustc_session::Session;
+use rustc_span::{FileNameDisplayPreference, SourceFileHash, StableSourceFileId};
+use rustc_target::abi::call::FnAbi;
+
+pub(crate) use self::emit::{DebugReloc, DebugRelocName};
+pub(crate) use self::types::TypeDebugContext;
+pub(crate) use self::unwind::UnwindContext;
+use crate::debuginfo::emit::{address_for_data, address_for_func};
+use crate::prelude::*;
+
+pub(crate) fn producer(sess: &Session) -> String {
+    format!("rustc version {} with cranelift {}", sess.cfg_version, cranelift_codegen::VERSION)
+}
+
+pub(crate) struct DebugContext {
+    endian: RunTimeEndian,
+
+    dwarf: DwarfUnit,
+    unit_range_list: RangeList,
+    created_files: FxHashMap<(StableSourceFileId, SourceFileHash), FileId>,
+    stack_pointer_register: Register,
+    namespace_map: DefIdMap<UnitEntryId>,
+    array_size_type: UnitEntryId,
+
+    filename_display_preference: FileNameDisplayPreference,
+}
+
+pub(crate) struct FunctionDebugContext {
+    entry_id: UnitEntryId,
+    function_source_loc: (FileId, u64, u64),
+    source_loc_set: IndexSet<(FileId, u64, u64)>,
+}
+
+impl DebugContext {
+    pub(crate) fn new(tcx: TyCtxt<'_>, isa: &dyn TargetIsa, cgu_name: &str) -> Self {
+        let encoding = Encoding {
+            format: Format::Dwarf32,
+            // FIXME this should be configurable
+            // macOS doesn't seem to support DWARF > 3
+            // 5 version is required for md5 file hash
+            version: if tcx.sess.target.is_like_osx {
+                3
+            } else {
+                // FIXME change to version 5 once the gdb and lldb shipping with the latest debian
+                // support it.
+                4
+            },
+            address_size: isa.frontend_config().pointer_bytes(),
+        };
+
+        let endian = match isa.endianness() {
+            Endianness::Little => RunTimeEndian::Little,
+            Endianness::Big => RunTimeEndian::Big,
+        };
+
+        let stack_pointer_register = match isa.triple().architecture {
+            target_lexicon::Architecture::Aarch64(_) => AArch64::SP,
+            target_lexicon::Architecture::Riscv64(_) => RiscV::SP,
+            target_lexicon::Architecture::X86_64 | target_lexicon::Architecture::X86_64h => {
+                X86_64::RSP
+            }
+            _ => Register(u16::MAX),
+        };
+
+        let mut dwarf = DwarfUnit::new(encoding);
+
+        use rustc_session::config::RemapPathScopeComponents;
+
+        let filename_display_preference =
+            tcx.sess.filename_display_preference(RemapPathScopeComponents::DEBUGINFO);
+
+        let producer = producer(tcx.sess);
+        let comp_dir =
+            tcx.sess.opts.working_dir.to_string_lossy(filename_display_preference).to_string();
+
+        let (name, file_info) = match tcx.sess.local_crate_source_file() {
+            Some(path) => {
+                let name = path.to_string_lossy(filename_display_preference).to_string();
+                (name, None)
+            }
+            None => (tcx.crate_name(LOCAL_CRATE).to_string(), None),
+        };
+
+        let mut line_program = LineProgram::new(
+            encoding,
+            LineEncoding::default(),
+            LineString::new(comp_dir.as_bytes(), encoding, &mut dwarf.line_strings),
+            LineString::new(name.as_bytes(), encoding, &mut dwarf.line_strings),
+            file_info,
+        );
+        line_program.file_has_md5 = file_info.is_some();
+
+        dwarf.unit.line_program = line_program;
+
+        {
+            let name = dwarf.strings.add(format!("{name}/@/{cgu_name}"));
+            let comp_dir = dwarf.strings.add(comp_dir);
+
+            let root = dwarf.unit.root();
+            let root = dwarf.unit.get_mut(root);
+            root.set(gimli::DW_AT_producer, AttributeValue::StringRef(dwarf.strings.add(producer)));
+            root.set(gimli::DW_AT_language, AttributeValue::Language(gimli::DW_LANG_Rust));
+            root.set(gimli::DW_AT_name, AttributeValue::StringRef(name));
+
+            // This will be replaced when emitting the debuginfo. It is only
+            // defined here to ensure that the order of the attributes matches
+            // rustc.
+            root.set(gimli::DW_AT_stmt_list, AttributeValue::Udata(0));
+
+            root.set(gimli::DW_AT_comp_dir, AttributeValue::StringRef(comp_dir));
+            root.set(gimli::DW_AT_low_pc, AttributeValue::Address(Address::Constant(0)));
+        }
+
+        let array_size_type = dwarf.unit.add(dwarf.unit.root(), gimli::DW_TAG_base_type);
+        let array_size_type_entry = dwarf.unit.get_mut(array_size_type);
+        array_size_type_entry.set(
+            gimli::DW_AT_name,
+            AttributeValue::StringRef(dwarf.strings.add("__ARRAY_SIZE_TYPE__")),
+        );
+        array_size_type_entry
+            .set(gimli::DW_AT_encoding, AttributeValue::Encoding(gimli::DW_ATE_unsigned));
+        array_size_type_entry.set(
+            gimli::DW_AT_byte_size,
+            AttributeValue::Udata(isa.frontend_config().pointer_bytes().into()),
+        );
+
+        DebugContext {
+            endian,
+            dwarf,
+            unit_range_list: RangeList(Vec::new()),
+            created_files: FxHashMap::default(),
+            stack_pointer_register,
+            namespace_map: DefIdMap::default(),
+            array_size_type,
+            filename_display_preference,
+        }
+    }
+
+    fn item_namespace(&mut self, tcx: TyCtxt<'_>, def_id: DefId) -> UnitEntryId {
+        if let Some(&scope) = self.namespace_map.get(&def_id) {
+            return scope;
+        }
+
+        let def_key = tcx.def_key(def_id);
+        let parent_scope = def_key
+            .parent
+            .map(|parent| self.item_namespace(tcx, DefId { krate: def_id.krate, index: parent }))
+            .unwrap_or(self.dwarf.unit.root());
+
+        let namespace_name = {
+            let mut output = String::new();
+            type_names::push_item_name(tcx, def_id, false, &mut output);
+            output
+        };
+        let namespace_name_id = self.dwarf.strings.add(namespace_name);
+
+        let scope = self.dwarf.unit.add(parent_scope, gimli::DW_TAG_namespace);
+        let scope_entry = self.dwarf.unit.get_mut(scope);
+        scope_entry.set(gimli::DW_AT_name, AttributeValue::StringRef(namespace_name_id));
+
+        self.namespace_map.insert(def_id, scope);
+        scope
+    }
+
+    pub(crate) fn define_function<'tcx>(
+        &mut self,
+        tcx: TyCtxt<'tcx>,
+        type_dbg: &mut TypeDebugContext<'tcx>,
+        instance: Instance<'tcx>,
+        fn_abi: &'tcx FnAbi<'tcx, Ty<'tcx>>,
+        linkage_name: &str,
+        function_span: Span,
+    ) -> FunctionDebugContext {
+        let (file_id, line, column) = self.get_span_loc(tcx, function_span, function_span);
+
+        let scope = self.item_namespace(tcx, tcx.parent(instance.def_id()));
+
+        let mut name = String::new();
+        type_names::push_item_name(tcx, instance.def_id(), false, &mut name);
+
+        // Find the enclosing function, in case this is a closure.
+        let enclosing_fn_def_id = tcx.typeck_root_def_id(instance.def_id());
+
+        // We look up the generics of the enclosing function and truncate the args
+        // to their length in order to cut off extra stuff that might be in there for
+        // closures or coroutines.
+        let generics = tcx.generics_of(enclosing_fn_def_id);
+        let args = instance.args.truncate_to(tcx, generics);
+
+        type_names::push_generic_params(
+            tcx,
+            tcx.normalize_erasing_regions(ty::ParamEnv::reveal_all(), args),
+            enclosing_fn_def_id,
+            &mut name,
+        );
+
+        let entry_id = self.dwarf.unit.add(scope, gimli::DW_TAG_subprogram);
+        let entry = self.dwarf.unit.get_mut(entry_id);
+        let linkage_name_id =
+            if name != linkage_name { Some(self.dwarf.strings.add(linkage_name)) } else { None };
+        let name_id = self.dwarf.strings.add(name);
+
+        // These will be replaced in FunctionDebugContext::finalize. They are
+        // only defined here to ensure that the order of the attributes matches
+        // rustc.
+        entry.set(gimli::DW_AT_low_pc, AttributeValue::Udata(0));
+        entry.set(gimli::DW_AT_high_pc, AttributeValue::Udata(0));
+
+        let mut frame_base_expr = Expression::new();
+        frame_base_expr.op_reg(self.stack_pointer_register);
+        entry.set(gimli::DW_AT_frame_base, AttributeValue::Exprloc(frame_base_expr));
+
+        if let Some(linkage_name_id) = linkage_name_id {
+            entry.set(gimli::DW_AT_linkage_name, AttributeValue::StringRef(linkage_name_id));
+        }
+        // Gdb requires DW_AT_name. Otherwise the DW_TAG_subprogram is skipped.
+        entry.set(gimli::DW_AT_name, AttributeValue::StringRef(name_id));
+
+        entry.set(gimli::DW_AT_decl_file, AttributeValue::FileIndex(Some(file_id)));
+        entry.set(gimli::DW_AT_decl_line, AttributeValue::Udata(line));
+
+        if !fn_abi.ret.is_ignore() {
+            let return_dw_ty = self.debug_type(tcx, type_dbg, fn_abi.ret.layout.ty);
+            let entry = self.dwarf.unit.get_mut(entry_id);
+            entry.set(gimli::DW_AT_type, AttributeValue::UnitRef(return_dw_ty));
+        }
+
+        if tcx.is_reachable_non_generic(instance.def_id()) {
+            let entry = self.dwarf.unit.get_mut(entry_id);
+            entry.set(gimli::DW_AT_external, AttributeValue::FlagPresent);
+        }
+
+        FunctionDebugContext {
+            entry_id,
+            function_source_loc: (file_id, line, column),
+            source_loc_set: IndexSet::new(),
+        }
+    }
+
+    // Adapted from https://github.com/rust-lang/rust/blob/10a7aa14fed9b528b74b0f098c4899c37c09a9c7/compiler/rustc_codegen_llvm/src/debuginfo/metadata.rs#L1288-L1346
+    pub(crate) fn define_static<'tcx>(
+        &mut self,
+        tcx: TyCtxt<'tcx>,
+        type_dbg: &mut TypeDebugContext<'tcx>,
+        def_id: DefId,
+        data_id: DataId,
+    ) {
+        let DefKind::Static { nested, .. } = tcx.def_kind(def_id) else { bug!() };
+        if nested {
+            return;
+        }
+
+        let scope = self.item_namespace(tcx, tcx.parent(def_id));
+
+        let span = tcx.def_span(def_id);
+        let (file_id, line, _column) = self.get_span_loc(tcx, span, span);
+
+        let static_type = Instance::mono(tcx, def_id).ty(tcx, ty::ParamEnv::reveal_all());
+        let static_layout = tcx.layout_of(ty::ParamEnv::reveal_all().and(static_type)).unwrap();
+        // FIXME use the actual type layout
+        let type_id = self.debug_type(tcx, type_dbg, static_type);
+
+        let name = tcx.item_name(def_id);
+        let linkage_name = tcx.symbol_name(Instance::mono(tcx, def_id)).name;
+
+        let entry_id = self.dwarf.unit.add(scope, gimli::DW_TAG_variable);
+        let entry = self.dwarf.unit.get_mut(entry_id);
+        let linkage_name_id = if name.as_str() != linkage_name {
+            Some(self.dwarf.strings.add(linkage_name))
+        } else {
+            None
+        };
+        let name_id = self.dwarf.strings.add(name.as_str());
+
+        entry.set(gimli::DW_AT_name, AttributeValue::StringRef(name_id));
+        entry.set(gimli::DW_AT_type, AttributeValue::UnitRef(type_id));
+
+        if tcx.is_reachable_non_generic(def_id) {
+            entry.set(gimli::DW_AT_external, AttributeValue::FlagPresent);
+        }
+
+        entry.set(gimli::DW_AT_decl_file, AttributeValue::FileIndex(Some(file_id)));
+        entry.set(gimli::DW_AT_decl_line, AttributeValue::Udata(line));
+
+        entry.set(gimli::DW_AT_alignment, AttributeValue::Udata(static_layout.align.pref.bytes()));
+
+        let mut expr = Expression::new();
+        expr.op_addr(address_for_data(data_id));
+        entry.set(gimli::DW_AT_location, AttributeValue::Exprloc(expr));
+
+        if let Some(linkage_name_id) = linkage_name_id {
+            entry.set(gimli::DW_AT_linkage_name, AttributeValue::StringRef(linkage_name_id));
+        }
+    }
+}
+
+impl FunctionDebugContext {
+    pub(crate) fn finalize(
+        mut self,
+        debug_context: &mut DebugContext,
+        func_id: FuncId,
+        context: &Context,
+    ) {
+        let end = self.create_debug_lines(debug_context, func_id, context);
+
+        debug_context
+            .unit_range_list
+            .0
+            .push(Range::StartLength { begin: address_for_func(func_id), length: u64::from(end) });
+
+        let func_entry = debug_context.dwarf.unit.get_mut(self.entry_id);
+        // Gdb requires both DW_AT_low_pc and DW_AT_high_pc. Otherwise the DW_TAG_subprogram is skipped.
+        func_entry.set(gimli::DW_AT_low_pc, AttributeValue::Address(address_for_func(func_id)));
+        // Using Udata for DW_AT_high_pc requires at least DWARF4
+        func_entry.set(gimli::DW_AT_high_pc, AttributeValue::Udata(u64::from(end)));
+    }
+}
diff --git a/compiler/rustc_codegen_cranelift/src/debuginfo/object.rs b/compiler/rustc_codegen_cranelift/src/debuginfo/object.rs
new file mode 100644
index 00000000000..65f4c67b21f
--- /dev/null
+++ b/compiler/rustc_codegen_cranelift/src/debuginfo/object.rs
@@ -0,0 +1,85 @@
+use cranelift_module::{DataId, FuncId};
+use cranelift_object::ObjectProduct;
+use gimli::SectionId;
+use object::write::{Relocation, StandardSegment};
+use object::{RelocationEncoding, RelocationFlags, SectionKind};
+use rustc_data_structures::fx::FxHashMap;
+
+use crate::debuginfo::{DebugReloc, DebugRelocName};
+
+pub(super) trait WriteDebugInfo {
+    type SectionId: Copy;
+
+    fn add_debug_section(&mut self, name: SectionId, data: Vec<u8>) -> Self::SectionId;
+    fn add_debug_reloc(
+        &mut self,
+        section_map: &FxHashMap<SectionId, Self::SectionId>,
+        from: &Self::SectionId,
+        reloc: &DebugReloc,
+    );
+}
+
+impl WriteDebugInfo for ObjectProduct {
+    type SectionId = (object::write::SectionId, object::write::SymbolId);
+
+    fn add_debug_section(
+        &mut self,
+        id: SectionId,
+        data: Vec<u8>,
+    ) -> (object::write::SectionId, object::write::SymbolId) {
+        let name = if self.object.format() == object::BinaryFormat::MachO {
+            id.name().replace('.', "__") // machO expects __debug_info instead of .debug_info
+        } else {
+            id.name().to_string()
+        }
+        .into_bytes();
+
+        let segment = self.object.segment_name(StandardSegment::Debug).to_vec();
+        // FIXME use SHT_X86_64_UNWIND for .eh_frame
+        let section_id = self.object.add_section(
+            segment,
+            name,
+            if id == SectionId::EhFrame { SectionKind::ReadOnlyData } else { SectionKind::Debug },
+        );
+        self.object
+            .section_mut(section_id)
+            .set_data(data, if id == SectionId::EhFrame { 8 } else { 1 });
+        let symbol_id = self.object.section_symbol(section_id);
+        (section_id, symbol_id)
+    }
+
+    fn add_debug_reloc(
+        &mut self,
+        section_map: &FxHashMap<SectionId, Self::SectionId>,
+        from: &Self::SectionId,
+        reloc: &DebugReloc,
+    ) {
+        let (symbol, symbol_offset) = match reloc.name {
+            DebugRelocName::Section(id) => (section_map.get(&id).unwrap().1, 0),
+            DebugRelocName::Symbol(id) => {
+                let id = id.try_into().unwrap();
+                let symbol_id = if id & 1 << 31 == 0 {
+                    self.function_symbol(FuncId::from_u32(id))
+                } else {
+                    self.data_symbol(DataId::from_u32(id & !(1 << 31)))
+                };
+                self.object.symbol_section_and_offset(symbol_id).unwrap_or((symbol_id, 0))
+            }
+        };
+        self.object
+            .add_relocation(
+                from.0,
+                Relocation {
+                    offset: u64::from(reloc.offset),
+                    symbol,
+                    flags: RelocationFlags::Generic {
+                        kind: reloc.kind,
+                        encoding: RelocationEncoding::Generic,
+                        size: reloc.size * 8,
+                    },
+                    addend: i64::try_from(symbol_offset).unwrap() + reloc.addend,
+                },
+            )
+            .unwrap();
+    }
+}
diff --git a/compiler/rustc_codegen_cranelift/src/debuginfo/types.rs b/compiler/rustc_codegen_cranelift/src/debuginfo/types.rs
new file mode 100644
index 00000000000..7baf0a3868d
--- /dev/null
+++ b/compiler/rustc_codegen_cranelift/src/debuginfo/types.rs
@@ -0,0 +1,204 @@
+// Adapted from https://github.com/rust-lang/rust/blob/10a7aa14fed9b528b74b0f098c4899c37c09a9c7/compiler/rustc_codegen_llvm/src/debuginfo/metadata.rs
+
+use gimli::write::{AttributeValue, UnitEntryId};
+use rustc_codegen_ssa::debuginfo::type_names;
+use rustc_data_structures::fx::FxHashMap;
+use rustc_middle::ty::layout::LayoutOf;
+use rustc_middle::ty::{self, Ty, TyCtxt};
+
+use crate::{has_ptr_meta, DebugContext, RevealAllLayoutCx};
+
+#[derive(Default)]
+pub(crate) struct TypeDebugContext<'tcx> {
+    type_map: FxHashMap<Ty<'tcx>, UnitEntryId>,
+}
+
+/// Returns from the enclosing function if the type debuginfo node with the given
+/// unique ID can be found in the type map.
+macro_rules! return_if_type_created_in_meantime {
+    ($type_dbg:expr, $ty:expr) => {
+        if let Some(&type_id) = $type_dbg.type_map.get(&$ty) {
+            return type_id;
+        }
+    };
+}
+
+impl DebugContext {
+    pub(crate) fn debug_type<'tcx>(
+        &mut self,
+        tcx: TyCtxt<'tcx>,
+        type_dbg: &mut TypeDebugContext<'tcx>,
+        ty: Ty<'tcx>,
+    ) -> UnitEntryId {
+        if let Some(&type_id) = type_dbg.type_map.get(&ty) {
+            return type_id;
+        }
+
+        let type_id = match ty.kind() {
+            ty::Never | ty::Bool | ty::Char | ty::Int(_) | ty::Uint(_) | ty::Float(_) => {
+                self.basic_type(tcx, ty)
+            }
+            ty::Tuple(elems) if elems.is_empty() => self.basic_type(tcx, ty),
+            ty::Array(elem_ty, len) => self.array_type(
+                tcx,
+                type_dbg,
+                ty,
+                *elem_ty,
+                len.eval_target_usize(tcx, ty::ParamEnv::reveal_all()),
+            ),
+            // ty::Slice(_) | ty::Str
+            // ty::Dynamic
+            // ty::Foreign
+            ty::RawPtr(pointee_type, _) | ty::Ref(_, pointee_type, _) => {
+                self.pointer_type(tcx, type_dbg, ty, *pointee_type)
+            }
+            // ty::Adt(def, args) if def.is_box() && args.get(1).map_or(true, |arg| cx.layout_of(arg.expect_ty()).is_1zst())
+            // ty::FnDef(..) | ty::FnPtr(..)
+            // ty::Closure(..)
+            // ty::Adt(def, ..)
+            ty::Tuple(components) => self.tuple_type(tcx, type_dbg, ty, *components),
+            // ty::Param(_)
+            // FIXME implement remaining types and add unreachable!() to the fallback branch
+            _ => self.placeholder_for_type(tcx, type_dbg, ty),
+        };
+
+        type_dbg.type_map.insert(ty, type_id);
+
+        type_id
+    }
+
+    fn basic_type<'tcx>(&mut self, tcx: TyCtxt<'tcx>, ty: Ty<'tcx>) -> UnitEntryId {
+        let (name, encoding) = match ty.kind() {
+            ty::Never => ("!", gimli::DW_ATE_unsigned),
+            ty::Tuple(elems) if elems.is_empty() => ("()", gimli::DW_ATE_unsigned),
+            ty::Bool => ("bool", gimli::DW_ATE_boolean),
+            ty::Char => ("char", gimli::DW_ATE_UTF),
+            ty::Int(int_ty) => (int_ty.name_str(), gimli::DW_ATE_signed),
+            ty::Uint(uint_ty) => (uint_ty.name_str(), gimli::DW_ATE_unsigned),
+            ty::Float(float_ty) => (float_ty.name_str(), gimli::DW_ATE_float),
+            _ => unreachable!(),
+        };
+
+        let type_id = self.dwarf.unit.add(self.dwarf.unit.root(), gimli::DW_TAG_base_type);
+        let type_entry = self.dwarf.unit.get_mut(type_id);
+        type_entry.set(gimli::DW_AT_name, AttributeValue::StringRef(self.dwarf.strings.add(name)));
+        type_entry.set(gimli::DW_AT_encoding, AttributeValue::Encoding(encoding));
+        type_entry.set(
+            gimli::DW_AT_byte_size,
+            AttributeValue::Udata(RevealAllLayoutCx(tcx).layout_of(ty).size.bytes()),
+        );
+
+        type_id
+    }
+
+    fn array_type<'tcx>(
+        &mut self,
+        tcx: TyCtxt<'tcx>,
+        type_dbg: &mut TypeDebugContext<'tcx>,
+        array_ty: Ty<'tcx>,
+        elem_ty: Ty<'tcx>,
+        len: u64,
+    ) -> UnitEntryId {
+        let elem_dw_ty = self.debug_type(tcx, type_dbg, elem_ty);
+
+        return_if_type_created_in_meantime!(type_dbg, array_ty);
+
+        let array_type_id = self.dwarf.unit.add(self.dwarf.unit.root(), gimli::DW_TAG_array_type);
+        let array_type_entry = self.dwarf.unit.get_mut(array_type_id);
+        array_type_entry.set(gimli::DW_AT_type, AttributeValue::UnitRef(elem_dw_ty));
+
+        let subrange_id = self.dwarf.unit.add(array_type_id, gimli::DW_TAG_subrange_type);
+        let subrange_entry = self.dwarf.unit.get_mut(subrange_id);
+        subrange_entry.set(gimli::DW_AT_type, AttributeValue::UnitRef(self.array_size_type));
+        subrange_entry.set(gimli::DW_AT_lower_bound, AttributeValue::Udata(0));
+        subrange_entry.set(gimli::DW_AT_count, AttributeValue::Udata(len));
+
+        array_type_id
+    }
+
+    fn pointer_type<'tcx>(
+        &mut self,
+        tcx: TyCtxt<'tcx>,
+        type_dbg: &mut TypeDebugContext<'tcx>,
+        ptr_type: Ty<'tcx>,
+        pointee_type: Ty<'tcx>,
+    ) -> UnitEntryId {
+        let pointee_dw_ty = self.debug_type(tcx, type_dbg, pointee_type);
+
+        return_if_type_created_in_meantime!(type_dbg, ptr_type);
+
+        let name = type_names::compute_debuginfo_type_name(tcx, ptr_type, true);
+
+        if !has_ptr_meta(tcx, ptr_type) {
+            let pointer_type_id =
+                self.dwarf.unit.add(self.dwarf.unit.root(), gimli::DW_TAG_pointer_type);
+            let pointer_entry = self.dwarf.unit.get_mut(pointer_type_id);
+            pointer_entry.set(gimli::DW_AT_type, AttributeValue::UnitRef(pointee_dw_ty));
+            pointer_entry
+                .set(gimli::DW_AT_name, AttributeValue::StringRef(self.dwarf.strings.add(name)));
+
+            pointer_type_id
+        } else {
+            // FIXME implement debuginfo for fat pointers
+            self.placeholder_for_type(tcx, type_dbg, ptr_type)
+        }
+    }
+
+    fn tuple_type<'tcx>(
+        &mut self,
+        tcx: TyCtxt<'tcx>,
+        type_dbg: &mut TypeDebugContext<'tcx>,
+        tuple_type: Ty<'tcx>,
+        components: &'tcx [Ty<'tcx>],
+    ) -> UnitEntryId {
+        let components = components
+            .into_iter()
+            .map(|&ty| (ty, self.debug_type(tcx, type_dbg, ty)))
+            .collect::<Vec<_>>();
+
+        return_if_type_created_in_meantime!(type_dbg, tuple_type);
+
+        let name = type_names::compute_debuginfo_type_name(tcx, tuple_type, false);
+        let layout = RevealAllLayoutCx(tcx).layout_of(tuple_type);
+
+        let tuple_type_id =
+            self.dwarf.unit.add(self.dwarf.unit.root(), gimli::DW_TAG_structure_type);
+        let tuple_entry = self.dwarf.unit.get_mut(tuple_type_id);
+        tuple_entry.set(gimli::DW_AT_name, AttributeValue::StringRef(self.dwarf.strings.add(name)));
+        tuple_entry.set(gimli::DW_AT_byte_size, AttributeValue::Udata(layout.size.bytes()));
+        tuple_entry.set(gimli::DW_AT_alignment, AttributeValue::Udata(layout.align.pref.bytes()));
+
+        for (i, (ty, dw_ty)) in components.into_iter().enumerate() {
+            let member_id = self.dwarf.unit.add(tuple_type_id, gimli::DW_TAG_member);
+            let member_entry = self.dwarf.unit.get_mut(member_id);
+            member_entry.set(
+                gimli::DW_AT_name,
+                AttributeValue::StringRef(self.dwarf.strings.add(format!("__{i}"))),
+            );
+            member_entry.set(gimli::DW_AT_type, AttributeValue::UnitRef(dw_ty));
+            member_entry.set(
+                gimli::DW_AT_alignment,
+                AttributeValue::Udata(RevealAllLayoutCx(tcx).layout_of(ty).align.pref.bytes()),
+            );
+            member_entry.set(
+                gimli::DW_AT_data_member_location,
+                AttributeValue::Udata(layout.fields.offset(i).bytes()),
+            );
+        }
+
+        tuple_type_id
+    }
+
+    fn placeholder_for_type<'tcx>(
+        &mut self,
+        tcx: TyCtxt<'tcx>,
+        type_dbg: &mut TypeDebugContext<'tcx>,
+        ty: Ty<'tcx>,
+    ) -> UnitEntryId {
+        self.debug_type(
+            tcx,
+            type_dbg,
+            Ty::new_array(tcx, tcx.types.u8, RevealAllLayoutCx(tcx).layout_of(ty).size.bytes()),
+        )
+    }
+}
diff --git a/compiler/rustc_codegen_cranelift/src/debuginfo/unwind.rs b/compiler/rustc_codegen_cranelift/src/debuginfo/unwind.rs
new file mode 100644
index 00000000000..eebd181341d
--- /dev/null
+++ b/compiler/rustc_codegen_cranelift/src/debuginfo/unwind.rs
@@ -0,0 +1,142 @@
+//! Unwind info generation (`.eh_frame`)
+
+use cranelift_codegen::ir::Endianness;
+use cranelift_codegen::isa::{unwind::UnwindInfo, TargetIsa};
+use cranelift_object::ObjectProduct;
+use gimli::write::{CieId, EhFrame, FrameTable, Section};
+use gimli::RunTimeEndian;
+
+use super::emit::address_for_func;
+use super::object::WriteDebugInfo;
+use crate::prelude::*;
+
+pub(crate) struct UnwindContext {
+    endian: RunTimeEndian,
+    frame_table: FrameTable,
+    cie_id: Option<CieId>,
+}
+
+impl UnwindContext {
+    pub(crate) fn new(isa: &dyn TargetIsa, pic_eh_frame: bool) -> Self {
+        let endian = match isa.endianness() {
+            Endianness::Little => RunTimeEndian::Little,
+            Endianness::Big => RunTimeEndian::Big,
+        };
+        let mut frame_table = FrameTable::default();
+
+        let cie_id = if let Some(mut cie) = isa.create_systemv_cie() {
+            if pic_eh_frame {
+                cie.fde_address_encoding =
+                    gimli::DwEhPe(gimli::DW_EH_PE_pcrel.0 | gimli::DW_EH_PE_sdata4.0);
+            }
+            Some(frame_table.add_cie(cie))
+        } else {
+            None
+        };
+
+        UnwindContext { endian, frame_table, cie_id }
+    }
+
+    pub(crate) fn add_function(&mut self, func_id: FuncId, context: &Context, isa: &dyn TargetIsa) {
+        if let target_lexicon::OperatingSystem::MacOSX { .. } = isa.triple().operating_system {
+            // The object crate doesn't currently support DW_GNU_EH_PE_absptr, which macOS
+            // requires for unwinding tables. In addition on arm64 it currently doesn't
+            // support 32bit relocations as we currently use for the unwinding table.
+            // See gimli-rs/object#415 and rust-lang/rustc_codegen_cranelift#1371
+            return;
+        }
+
+        let unwind_info = if let Some(unwind_info) =
+            context.compiled_code().unwrap().create_unwind_info(isa).unwrap()
+        {
+            unwind_info
+        } else {
+            return;
+        };
+
+        match unwind_info {
+            UnwindInfo::SystemV(unwind_info) => {
+                self.frame_table
+                    .add_fde(self.cie_id.unwrap(), unwind_info.to_fde(address_for_func(func_id)));
+            }
+            UnwindInfo::WindowsX64(_) => {
+                // FIXME implement this
+            }
+            unwind_info => unimplemented!("{:?}", unwind_info),
+        }
+    }
+
+    pub(crate) fn emit(self, product: &mut ObjectProduct) {
+        let mut eh_frame = EhFrame::from(super::emit::WriterRelocate::new(self.endian));
+        self.frame_table.write_eh_frame(&mut eh_frame).unwrap();
+
+        if !eh_frame.0.writer.slice().is_empty() {
+            let id = eh_frame.id();
+            let section_id = product.add_debug_section(id, eh_frame.0.writer.into_vec());
+            let mut section_map = FxHashMap::default();
+            section_map.insert(id, section_id);
+
+            for reloc in &eh_frame.0.relocs {
+                product.add_debug_reloc(&section_map, &section_id, reloc);
+            }
+        }
+    }
+
+    #[cfg(all(feature = "jit", windows))]
+    pub(crate) unsafe fn register_jit(self, _jit_module: &cranelift_jit::JITModule) {}
+
+    #[cfg(all(feature = "jit", not(windows)))]
+    pub(crate) unsafe fn register_jit(self, jit_module: &cranelift_jit::JITModule) {
+        use std::mem::ManuallyDrop;
+
+        let mut eh_frame = EhFrame::from(super::emit::WriterRelocate::new(self.endian));
+        self.frame_table.write_eh_frame(&mut eh_frame).unwrap();
+
+        if eh_frame.0.writer.slice().is_empty() {
+            return;
+        }
+
+        let mut eh_frame = eh_frame.0.relocate_for_jit(jit_module);
+
+        // GCC expects a terminating "empty" length, so write a 0 length at the end of the table.
+        eh_frame.extend(&[0, 0, 0, 0]);
+
+        // FIXME support unregistering unwind tables once cranelift-jit supports deallocating
+        // individual functions
+        let eh_frame = ManuallyDrop::new(eh_frame);
+
+        // =======================================================================
+        // Everything after this line up to the end of the file is loosely based on
+        // https://github.com/bytecodealliance/wasmtime/blob/4471a82b0c540ff48960eca6757ccce5b1b5c3e4/crates/jit/src/unwind/systemv.rs
+        #[cfg(target_os = "macos")]
+        {
+            // On macOS, `__register_frame` takes a pointer to a single FDE
+            let start = eh_frame.as_ptr();
+            let end = start.add(eh_frame.len());
+            let mut current = start;
+
+            // Walk all of the entries in the frame table and register them
+            while current < end {
+                let len = std::ptr::read::<u32>(current as *const u32) as usize;
+
+                // Skip over the CIE
+                if current != start {
+                    __register_frame(current);
+                }
+
+                // Move to the next table entry (+4 because the length itself is not inclusive)
+                current = current.add(len + 4);
+            }
+        }
+        #[cfg(not(target_os = "macos"))]
+        {
+            // On other platforms, `__register_frame` will walk the FDEs until an entry of length 0
+            __register_frame(eh_frame.as_ptr());
+        }
+    }
+}
+
+extern "C" {
+    // libunwind import
+    fn __register_frame(fde: *const u8);
+}
diff --git a/compiler/rustc_codegen_cranelift/src/discriminant.rs b/compiler/rustc_codegen_cranelift/src/discriminant.rs
new file mode 100644
index 00000000000..e7ac084558a
--- /dev/null
+++ b/compiler/rustc_codegen_cranelift/src/discriminant.rs
@@ -0,0 +1,214 @@
+//! Handling of enum discriminants
+//!
+//! Adapted from <https://github.com/rust-lang/rust/blob/31c0645b9d2539f47eecb096142474b29dc542f7/compiler/rustc_codegen_ssa/src/mir/place.rs>
+//! (<https://github.com/rust-lang/rust/pull/104535>)
+
+use rustc_target::abi::{Int, TagEncoding, Variants};
+
+use crate::prelude::*;
+
+pub(crate) fn codegen_set_discriminant<'tcx>(
+    fx: &mut FunctionCx<'_, '_, 'tcx>,
+    place: CPlace<'tcx>,
+    variant_index: VariantIdx,
+) {
+    let layout = place.layout();
+    if layout.for_variant(fx, variant_index).abi.is_uninhabited() {
+        return;
+    }
+    match layout.variants {
+        Variants::Single { index } => {
+            assert_eq!(index, variant_index);
+        }
+        Variants::Multiple {
+            tag: _,
+            tag_field,
+            tag_encoding: TagEncoding::Direct,
+            variants: _,
+        } => {
+            let ptr = place.place_field(fx, FieldIdx::new(tag_field));
+            let to = layout.ty.discriminant_for_variant(fx.tcx, variant_index).unwrap().val;
+            let to = match ptr.layout().ty.kind() {
+                ty::Uint(UintTy::U128) | ty::Int(IntTy::I128) => {
+                    let lsb = fx.bcx.ins().iconst(types::I64, to as u64 as i64);
+                    let msb = fx.bcx.ins().iconst(types::I64, (to >> 64) as u64 as i64);
+                    fx.bcx.ins().iconcat(lsb, msb)
+                }
+                ty::Uint(_) | ty::Int(_) => {
+                    let clif_ty = fx.clif_type(ptr.layout().ty).unwrap();
+                    let raw_val = ptr.layout().size.truncate(to);
+                    fx.bcx.ins().iconst(clif_ty, raw_val as i64)
+                }
+                _ => unreachable!(),
+            };
+            let discr = CValue::by_val(to, ptr.layout());
+            ptr.write_cvalue(fx, discr);
+        }
+        Variants::Multiple {
+            tag: _,
+            tag_field,
+            tag_encoding: TagEncoding::Niche { untagged_variant, ref niche_variants, niche_start },
+            variants: _,
+        } => {
+            if variant_index != untagged_variant {
+                let niche = place.place_field(fx, FieldIdx::new(tag_field));
+                let niche_type = fx.clif_type(niche.layout().ty).unwrap();
+                let niche_value = variant_index.as_u32() - niche_variants.start().as_u32();
+                let niche_value = (niche_value as u128).wrapping_add(niche_start);
+                let niche_value = match niche_type {
+                    types::I128 => {
+                        let lsb = fx.bcx.ins().iconst(types::I64, niche_value as u64 as i64);
+                        let msb =
+                            fx.bcx.ins().iconst(types::I64, (niche_value >> 64) as u64 as i64);
+                        fx.bcx.ins().iconcat(lsb, msb)
+                    }
+                    ty => fx.bcx.ins().iconst(ty, niche_value as i64),
+                };
+                let niche_llval = CValue::by_val(niche_value, niche.layout());
+                niche.write_cvalue(fx, niche_llval);
+            }
+        }
+    }
+}
+
+pub(crate) fn codegen_get_discriminant<'tcx>(
+    fx: &mut FunctionCx<'_, '_, 'tcx>,
+    dest: CPlace<'tcx>,
+    value: CValue<'tcx>,
+    dest_layout: TyAndLayout<'tcx>,
+) {
+    let layout = value.layout();
+
+    if layout.abi.is_uninhabited() {
+        return;
+    }
+
+    let (tag_scalar, tag_field, tag_encoding) = match &layout.variants {
+        Variants::Single { index } => {
+            let discr_val = layout
+                .ty
+                .discriminant_for_variant(fx.tcx, *index)
+                .map_or(u128::from(index.as_u32()), |discr| discr.val);
+
+            let val = match dest_layout.ty.kind() {
+                ty::Uint(UintTy::U128) | ty::Int(IntTy::I128) => {
+                    let lsb = fx.bcx.ins().iconst(types::I64, discr_val as u64 as i64);
+                    let msb = fx.bcx.ins().iconst(types::I64, (discr_val >> 64) as u64 as i64);
+                    fx.bcx.ins().iconcat(lsb, msb)
+                }
+                ty::Uint(_) | ty::Int(_) => {
+                    let clif_ty = fx.clif_type(dest_layout.ty).unwrap();
+                    let raw_val = dest_layout.size.truncate(discr_val);
+                    fx.bcx.ins().iconst(clif_ty, raw_val as i64)
+                }
+                _ => unreachable!(),
+            };
+            let res = CValue::by_val(val, dest_layout);
+            dest.write_cvalue(fx, res);
+            return;
+        }
+        Variants::Multiple { tag, tag_field, tag_encoding, variants: _ } => {
+            (tag, *tag_field, tag_encoding)
+        }
+    };
+
+    let cast_to = fx.clif_type(dest_layout.ty).unwrap();
+
+    // Read the tag/niche-encoded discriminant from memory.
+    let tag = value.value_field(fx, FieldIdx::new(tag_field));
+    let tag = tag.load_scalar(fx);
+
+    // Decode the discriminant (specifically if it's niche-encoded).
+    match *tag_encoding {
+        TagEncoding::Direct => {
+            let signed = match tag_scalar.primitive() {
+                Int(_, signed) => signed,
+                _ => false,
+            };
+            let val = clif_intcast(fx, tag, cast_to, signed);
+            let res = CValue::by_val(val, dest_layout);
+            dest.write_cvalue(fx, res);
+        }
+        TagEncoding::Niche { untagged_variant, ref niche_variants, niche_start } => {
+            let relative_max = niche_variants.end().as_u32() - niche_variants.start().as_u32();
+
+            // We have a subrange `niche_start..=niche_end` inside `range`.
+            // If the value of the tag is inside this subrange, it's a
+            // "niche value", an increment of the discriminant. Otherwise it
+            // indicates the untagged variant.
+            // A general algorithm to extract the discriminant from the tag
+            // is:
+            // relative_tag = tag - niche_start
+            // is_niche = relative_tag <= (ule) relative_max
+            // discr = if is_niche {
+            //     cast(relative_tag) + niche_variants.start()
+            // } else {
+            //     untagged_variant
+            // }
+            // However, we will likely be able to emit simpler code.
+
+            let (is_niche, tagged_discr, delta) = if relative_max == 0 {
+                // Best case scenario: only one tagged variant. This will
+                // likely become just a comparison and a jump.
+                // The algorithm is:
+                // is_niche = tag == niche_start
+                // discr = if is_niche {
+                //     niche_start
+                // } else {
+                //     untagged_variant
+                // }
+                let is_niche = codegen_icmp_imm(fx, IntCC::Equal, tag, niche_start as i128);
+                let tagged_discr =
+                    fx.bcx.ins().iconst(cast_to, niche_variants.start().as_u32() as i64);
+                (is_niche, tagged_discr, 0)
+            } else {
+                // The special cases don't apply, so we'll have to go with
+                // the general algorithm.
+                let niche_start = match fx.bcx.func.dfg.value_type(tag) {
+                    types::I128 => {
+                        let lsb = fx.bcx.ins().iconst(types::I64, niche_start as u64 as i64);
+                        let msb =
+                            fx.bcx.ins().iconst(types::I64, (niche_start >> 64) as u64 as i64);
+                        fx.bcx.ins().iconcat(lsb, msb)
+                    }
+                    ty => fx.bcx.ins().iconst(ty, niche_start as i64),
+                };
+                let relative_discr = fx.bcx.ins().isub(tag, niche_start);
+                let cast_tag = clif_intcast(fx, relative_discr, cast_to, false);
+                let is_niche = crate::common::codegen_icmp_imm(
+                    fx,
+                    IntCC::UnsignedLessThanOrEqual,
+                    relative_discr,
+                    i128::from(relative_max),
+                );
+                (is_niche, cast_tag, niche_variants.start().as_u32() as u128)
+            };
+
+            let tagged_discr = if delta == 0 {
+                tagged_discr
+            } else {
+                let delta = match cast_to {
+                    types::I128 => {
+                        let lsb = fx.bcx.ins().iconst(types::I64, delta as u64 as i64);
+                        let msb = fx.bcx.ins().iconst(types::I64, (delta >> 64) as u64 as i64);
+                        fx.bcx.ins().iconcat(lsb, msb)
+                    }
+                    ty => fx.bcx.ins().iconst(ty, delta as i64),
+                };
+                fx.bcx.ins().iadd(tagged_discr, delta)
+            };
+
+            let untagged_variant = if cast_to == types::I128 {
+                let zero = fx.bcx.ins().iconst(types::I64, 0);
+                let untagged_variant =
+                    fx.bcx.ins().iconst(types::I64, i64::from(untagged_variant.as_u32()));
+                fx.bcx.ins().iconcat(untagged_variant, zero)
+            } else {
+                fx.bcx.ins().iconst(cast_to, i64::from(untagged_variant.as_u32()))
+            };
+            let discr = fx.bcx.ins().select(is_niche, tagged_discr, untagged_variant);
+            let res = CValue::by_val(discr, dest_layout);
+            dest.write_cvalue(fx, res);
+        }
+    }
+}
diff --git a/compiler/rustc_codegen_cranelift/src/driver/aot.rs b/compiler/rustc_codegen_cranelift/src/driver/aot.rs
new file mode 100644
index 00000000000..fce4690f97d
--- /dev/null
+++ b/compiler/rustc_codegen_cranelift/src/driver/aot.rs
@@ -0,0 +1,712 @@
+//! The AOT driver uses [`cranelift_object`] to write object files suitable for linking into a
+//! standalone executable.
+
+use std::fs::{self, File};
+use std::path::{Path, PathBuf};
+use std::sync::Arc;
+use std::thread::JoinHandle;
+
+use cranelift_object::{ObjectBuilder, ObjectModule};
+use rustc_codegen_ssa::assert_module_sources::CguReuse;
+use rustc_codegen_ssa::back::link::ensure_removed;
+use rustc_codegen_ssa::back::metadata::create_compressed_metadata_file;
+use rustc_codegen_ssa::base::determine_cgu_reuse;
+use rustc_codegen_ssa::errors as ssa_errors;
+use rustc_codegen_ssa::{CodegenResults, CompiledModule, CrateInfo, ModuleKind};
+use rustc_data_structures::profiling::SelfProfilerRef;
+use rustc_data_structures::stable_hasher::{HashStable, StableHasher};
+use rustc_data_structures::sync::{par_map, IntoDynSyncSend};
+use rustc_metadata::fs::copy_to_stdout;
+use rustc_metadata::EncodedMetadata;
+use rustc_middle::dep_graph::{WorkProduct, WorkProductId};
+use rustc_middle::mir::mono::{CodegenUnit, MonoItem};
+use rustc_session::config::{DebugInfo, OutFileName, OutputFilenames, OutputType};
+use rustc_session::Session;
+
+use crate::concurrency_limiter::{ConcurrencyLimiter, ConcurrencyLimiterToken};
+use crate::debuginfo::TypeDebugContext;
+use crate::global_asm::GlobalAsmConfig;
+use crate::{prelude::*, BackendConfig};
+
+struct ModuleCodegenResult {
+    module_regular: CompiledModule,
+    module_global_asm: Option<CompiledModule>,
+    existing_work_product: Option<(WorkProductId, WorkProduct)>,
+}
+
+enum OngoingModuleCodegen {
+    Sync(Result<ModuleCodegenResult, String>),
+    Async(JoinHandle<Result<ModuleCodegenResult, String>>),
+}
+
+impl<HCX> HashStable<HCX> for OngoingModuleCodegen {
+    fn hash_stable(&self, _: &mut HCX, _: &mut StableHasher) {
+        // do nothing
+    }
+}
+
+pub(crate) struct OngoingCodegen {
+    modules: Vec<OngoingModuleCodegen>,
+    allocator_module: Option<CompiledModule>,
+    metadata_module: Option<CompiledModule>,
+    metadata: EncodedMetadata,
+    crate_info: CrateInfo,
+    concurrency_limiter: ConcurrencyLimiter,
+}
+
+impl OngoingCodegen {
+    pub(crate) fn join(
+        self,
+        sess: &Session,
+        outputs: &OutputFilenames,
+        backend_config: &BackendConfig,
+    ) -> (CodegenResults, FxIndexMap<WorkProductId, WorkProduct>) {
+        let mut work_products = FxIndexMap::default();
+        let mut modules = vec![];
+
+        for module_codegen in self.modules {
+            let module_codegen_result = match module_codegen {
+                OngoingModuleCodegen::Sync(module_codegen_result) => module_codegen_result,
+                OngoingModuleCodegen::Async(join_handle) => match join_handle.join() {
+                    Ok(module_codegen_result) => module_codegen_result,
+                    Err(panic) => std::panic::resume_unwind(panic),
+                },
+            };
+
+            let module_codegen_result = match module_codegen_result {
+                Ok(module_codegen_result) => module_codegen_result,
+                Err(err) => sess.dcx().fatal(err),
+            };
+            let ModuleCodegenResult { module_regular, module_global_asm, existing_work_product } =
+                module_codegen_result;
+
+            if let Some((work_product_id, work_product)) = existing_work_product {
+                work_products.insert(work_product_id, work_product);
+            } else {
+                let work_product = if backend_config.disable_incr_cache {
+                    None
+                } else if let Some(module_global_asm) = &module_global_asm {
+                    rustc_incremental::copy_cgu_workproduct_to_incr_comp_cache_dir(
+                        sess,
+                        &module_regular.name,
+                        &[
+                            ("o", &module_regular.object.as_ref().unwrap()),
+                            ("asm.o", &module_global_asm.object.as_ref().unwrap()),
+                        ],
+                    )
+                } else {
+                    rustc_incremental::copy_cgu_workproduct_to_incr_comp_cache_dir(
+                        sess,
+                        &module_regular.name,
+                        &[("o", &module_regular.object.as_ref().unwrap())],
+                    )
+                };
+                if let Some((work_product_id, work_product)) = work_product {
+                    work_products.insert(work_product_id, work_product);
+                }
+            }
+
+            modules.push(module_regular);
+            if let Some(module_global_asm) = module_global_asm {
+                modules.push(module_global_asm);
+            }
+        }
+
+        self.concurrency_limiter.finished();
+
+        sess.dcx().abort_if_errors();
+
+        let codegen_results = CodegenResults {
+            modules,
+            allocator_module: self.allocator_module,
+            metadata_module: self.metadata_module,
+            metadata: self.metadata,
+            crate_info: self.crate_info,
+        };
+
+        produce_final_output_artifacts(sess, &codegen_results, outputs);
+
+        (codegen_results, work_products)
+    }
+}
+
+// Adapted from https://github.com/rust-lang/rust/blob/73476d49904751f8d90ce904e16dfbc278083d2c/compiler/rustc_codegen_ssa/src/back/write.rs#L547C1-L706C2
+fn produce_final_output_artifacts(
+    sess: &Session,
+    codegen_results: &CodegenResults,
+    crate_output: &OutputFilenames,
+) {
+    let user_wants_bitcode = false;
+    let mut user_wants_objects = false;
+
+    // Produce final compile outputs.
+    let copy_gracefully = |from: &Path, to: &OutFileName| match to {
+        OutFileName::Stdout => {
+            if let Err(e) = copy_to_stdout(from) {
+                sess.dcx().emit_err(ssa_errors::CopyPath::new(from, to.as_path(), e));
+            }
+        }
+        OutFileName::Real(path) => {
+            if let Err(e) = fs::copy(from, path) {
+                sess.dcx().emit_err(ssa_errors::CopyPath::new(from, path, e));
+            }
+        }
+    };
+
+    let copy_if_one_unit = |output_type: OutputType, keep_numbered: bool| {
+        if codegen_results.modules.len() == 1 {
+            // 1) Only one codegen unit. In this case it's no difficulty
+            //    to copy `foo.0.x` to `foo.x`.
+            let module_name = Some(&codegen_results.modules[0].name[..]);
+            let path = crate_output.temp_path(output_type, module_name);
+            let output = crate_output.path(output_type);
+            if !output_type.is_text_output() && output.is_tty() {
+                sess.dcx()
+                    .emit_err(ssa_errors::BinaryOutputToTty { shorthand: output_type.shorthand() });
+            } else {
+                copy_gracefully(&path, &output);
+            }
+            if !sess.opts.cg.save_temps && !keep_numbered {
+                // The user just wants `foo.x`, not `foo.#module-name#.x`.
+                ensure_removed(sess.dcx(), &path);
+            }
+        } else {
+            let extension = crate_output
+                .temp_path(output_type, None)
+                .extension()
+                .unwrap()
+                .to_str()
+                .unwrap()
+                .to_owned();
+
+            if crate_output.outputs.contains_explicit_name(&output_type) {
+                // 2) Multiple codegen units, with `--emit foo=some_name`. We have
+                //    no good solution for this case, so warn the user.
+                sess.dcx().emit_warn(ssa_errors::IgnoringEmitPath { extension });
+            } else if crate_output.single_output_file.is_some() {
+                // 3) Multiple codegen units, with `-o some_name`. We have
+                //    no good solution for this case, so warn the user.
+                sess.dcx().emit_warn(ssa_errors::IgnoringOutput { extension });
+            } else {
+                // 4) Multiple codegen units, but no explicit name. We
+                //    just leave the `foo.0.x` files in place.
+                // (We don't have to do any work in this case.)
+            }
+        }
+    };
+
+    // Flag to indicate whether the user explicitly requested bitcode.
+    // Otherwise, we produced it only as a temporary output, and will need
+    // to get rid of it.
+    for output_type in crate_output.outputs.keys() {
+        match *output_type {
+            OutputType::Bitcode => {
+                // Cranelift doesn't have bitcode
+                // user_wants_bitcode = true;
+                // // Copy to .bc, but always keep the .0.bc. There is a later
+                // // check to figure out if we should delete .0.bc files, or keep
+                // // them for making an rlib.
+                // copy_if_one_unit(OutputType::Bitcode, true);
+            }
+            OutputType::LlvmAssembly => {
+                // Cranelift IR text already emitted during codegen
+                // copy_if_one_unit(OutputType::LlvmAssembly, false);
+            }
+            OutputType::Assembly => {
+                // Currently no support for emitting raw assembly files
+                // copy_if_one_unit(OutputType::Assembly, false);
+            }
+            OutputType::Object => {
+                user_wants_objects = true;
+                copy_if_one_unit(OutputType::Object, true);
+            }
+            OutputType::Mir | OutputType::Metadata | OutputType::Exe | OutputType::DepInfo => {}
+        }
+    }
+
+    // Clean up unwanted temporary files.
+
+    // We create the following files by default:
+    //  - #crate#.#module-name#.bc
+    //  - #crate#.#module-name#.o
+    //  - #crate#.crate.metadata.bc
+    //  - #crate#.crate.metadata.o
+    //  - #crate#.o (linked from crate.##.o)
+    //  - #crate#.bc (copied from crate.##.bc)
+    // We may create additional files if requested by the user (through
+    // `-C save-temps` or `--emit=` flags).
+
+    if !sess.opts.cg.save_temps {
+        // Remove the temporary .#module-name#.o objects. If the user didn't
+        // explicitly request bitcode (with --emit=bc), and the bitcode is not
+        // needed for building an rlib, then we must remove .#module-name#.bc as
+        // well.
+
+        // Specific rules for keeping .#module-name#.bc:
+        //  - If the user requested bitcode (`user_wants_bitcode`), and
+        //    codegen_units > 1, then keep it.
+        //  - If the user requested bitcode but codegen_units == 1, then we
+        //    can toss .#module-name#.bc because we copied it to .bc earlier.
+        //  - If we're not building an rlib and the user didn't request
+        //    bitcode, then delete .#module-name#.bc.
+        // If you change how this works, also update back::link::link_rlib,
+        // where .#module-name#.bc files are (maybe) deleted after making an
+        // rlib.
+        let needs_crate_object = crate_output.outputs.contains_key(&OutputType::Exe);
+
+        let keep_numbered_bitcode = user_wants_bitcode && sess.codegen_units().as_usize() > 1;
+
+        let keep_numbered_objects =
+            needs_crate_object || (user_wants_objects && sess.codegen_units().as_usize() > 1);
+
+        for module in codegen_results.modules.iter() {
+            if let Some(ref path) = module.object {
+                if !keep_numbered_objects {
+                    ensure_removed(sess.dcx(), path);
+                }
+            }
+
+            if let Some(ref path) = module.dwarf_object {
+                if !keep_numbered_objects {
+                    ensure_removed(sess.dcx(), path);
+                }
+            }
+
+            if let Some(ref path) = module.bytecode {
+                if !keep_numbered_bitcode {
+                    ensure_removed(sess.dcx(), path);
+                }
+            }
+        }
+
+        if !user_wants_bitcode {
+            if let Some(ref allocator_module) = codegen_results.allocator_module {
+                if let Some(ref path) = allocator_module.bytecode {
+                    ensure_removed(sess.dcx(), path);
+                }
+            }
+        }
+    }
+
+    // We leave the following files around by default:
+    //  - #crate#.o
+    //  - #crate#.crate.metadata.o
+    //  - #crate#.bc
+    // These are used in linking steps and will be cleaned up afterward.
+}
+
+fn make_module(sess: &Session, backend_config: &BackendConfig, name: String) -> ObjectModule {
+    let isa = crate::build_isa(sess, backend_config);
+
+    let mut builder =
+        ObjectBuilder::new(isa, name + ".o", cranelift_module::default_libcall_names()).unwrap();
+    // Unlike cg_llvm, cg_clif defaults to disabling -Zfunction-sections. For cg_llvm binary size
+    // is important, while cg_clif cares more about compilation times. Enabling -Zfunction-sections
+    // can easily double the amount of time necessary to perform linking.
+    builder.per_function_section(sess.opts.unstable_opts.function_sections.unwrap_or(false));
+    ObjectModule::new(builder)
+}
+
+fn emit_cgu(
+    output_filenames: &OutputFilenames,
+    prof: &SelfProfilerRef,
+    name: String,
+    module: ObjectModule,
+    debug: Option<DebugContext>,
+    unwind_context: UnwindContext,
+    global_asm_object_file: Option<PathBuf>,
+    producer: &str,
+) -> Result<ModuleCodegenResult, String> {
+    let mut product = module.finish();
+
+    if let Some(mut debug) = debug {
+        debug.emit(&mut product);
+    }
+
+    unwind_context.emit(&mut product);
+
+    let module_regular = emit_module(
+        output_filenames,
+        prof,
+        product.object,
+        ModuleKind::Regular,
+        name.clone(),
+        producer,
+    )?;
+
+    Ok(ModuleCodegenResult {
+        module_regular,
+        module_global_asm: global_asm_object_file.map(|global_asm_object_file| CompiledModule {
+            name: format!("{name}.asm"),
+            kind: ModuleKind::Regular,
+            object: Some(global_asm_object_file),
+            dwarf_object: None,
+            bytecode: None,
+            assembly: None,
+            llvm_ir: None,
+        }),
+        existing_work_product: None,
+    })
+}
+
+fn emit_module(
+    output_filenames: &OutputFilenames,
+    prof: &SelfProfilerRef,
+    mut object: cranelift_object::object::write::Object<'_>,
+    kind: ModuleKind,
+    name: String,
+    producer_str: &str,
+) -> Result<CompiledModule, String> {
+    if object.format() == cranelift_object::object::BinaryFormat::Elf {
+        let comment_section = object.add_section(
+            Vec::new(),
+            b".comment".to_vec(),
+            cranelift_object::object::SectionKind::OtherString,
+        );
+        let mut producer = vec![0];
+        producer.extend(producer_str.as_bytes());
+        producer.push(0);
+        object.set_section_data(comment_section, producer, 1);
+    }
+
+    let tmp_file = output_filenames.temp_path(OutputType::Object, Some(&name));
+    let mut file = match File::create(&tmp_file) {
+        Ok(file) => file,
+        Err(err) => return Err(format!("error creating object file: {}", err)),
+    };
+
+    if let Err(err) = object.write_stream(&mut file) {
+        return Err(format!("error writing object file: {}", err));
+    }
+
+    prof.artifact_size("object_file", &*name, file.metadata().unwrap().len());
+
+    Ok(CompiledModule {
+        name,
+        kind,
+        object: Some(tmp_file),
+        dwarf_object: None,
+        bytecode: None,
+        assembly: None,
+        llvm_ir: None,
+    })
+}
+
+fn reuse_workproduct_for_cgu(
+    tcx: TyCtxt<'_>,
+    cgu: &CodegenUnit<'_>,
+) -> Result<ModuleCodegenResult, String> {
+    let work_product = cgu.previous_work_product(tcx);
+    let obj_out_regular =
+        tcx.output_filenames(()).temp_path(OutputType::Object, Some(cgu.name().as_str()));
+    let source_file_regular = rustc_incremental::in_incr_comp_dir_sess(
+        &tcx.sess,
+        &work_product.saved_files.get("o").expect("no saved object file in work product"),
+    );
+
+    if let Err(err) = rustc_fs_util::link_or_copy(&source_file_regular, &obj_out_regular) {
+        return Err(format!(
+            "unable to copy {} to {}: {}",
+            source_file_regular.display(),
+            obj_out_regular.display(),
+            err
+        ));
+    }
+    let obj_out_global_asm =
+        crate::global_asm::add_file_stem_postfix(obj_out_regular.clone(), ".asm");
+    let has_global_asm = if let Some(asm_o) = work_product.saved_files.get("asm.o") {
+        let source_file_global_asm = rustc_incremental::in_incr_comp_dir_sess(&tcx.sess, asm_o);
+        if let Err(err) = rustc_fs_util::link_or_copy(&source_file_global_asm, &obj_out_global_asm)
+        {
+            return Err(format!(
+                "unable to copy {} to {}: {}",
+                source_file_regular.display(),
+                obj_out_regular.display(),
+                err
+            ));
+        }
+        true
+    } else {
+        false
+    };
+
+    Ok(ModuleCodegenResult {
+        module_regular: CompiledModule {
+            name: cgu.name().to_string(),
+            kind: ModuleKind::Regular,
+            object: Some(obj_out_regular),
+            dwarf_object: None,
+            bytecode: None,
+            assembly: None,
+            llvm_ir: None,
+        },
+        module_global_asm: has_global_asm.then(|| CompiledModule {
+            name: cgu.name().to_string(),
+            kind: ModuleKind::Regular,
+            object: Some(obj_out_global_asm),
+            dwarf_object: None,
+            bytecode: None,
+            assembly: None,
+            llvm_ir: None,
+        }),
+        existing_work_product: Some((cgu.work_product_id(), work_product)),
+    })
+}
+
+fn module_codegen(
+    tcx: TyCtxt<'_>,
+    (backend_config, global_asm_config, cgu_name, token): (
+        BackendConfig,
+        Arc<GlobalAsmConfig>,
+        rustc_span::Symbol,
+        ConcurrencyLimiterToken,
+    ),
+) -> OngoingModuleCodegen {
+    let (cgu_name, mut cx, mut module, codegened_functions) =
+        tcx.prof.generic_activity_with_arg("codegen cgu", cgu_name.as_str()).run(|| {
+            let cgu = tcx.codegen_unit(cgu_name);
+            let mono_items = cgu.items_in_deterministic_order(tcx);
+
+            let mut module = make_module(tcx.sess, &backend_config, cgu_name.as_str().to_string());
+
+            let mut cx = crate::CodegenCx::new(
+                tcx,
+                backend_config.clone(),
+                module.isa(),
+                tcx.sess.opts.debuginfo != DebugInfo::None,
+                cgu_name,
+            );
+            let mut type_dbg = TypeDebugContext::default();
+            super::predefine_mono_items(tcx, &mut module, &mono_items);
+            let mut codegened_functions = vec![];
+            for (mono_item, _) in mono_items {
+                match mono_item {
+                    MonoItem::Fn(inst) => {
+                        if let Some(codegened_function) = crate::base::codegen_fn(
+                            tcx,
+                            &mut cx,
+                            &mut type_dbg,
+                            Function::new(),
+                            &mut module,
+                            inst,
+                        ) {
+                            codegened_functions.push(codegened_function);
+                        }
+                    }
+                    MonoItem::Static(def_id) => {
+                        let data_id = crate::constant::codegen_static(tcx, &mut module, def_id);
+                        if let Some(debug_context) = &mut cx.debug_context {
+                            debug_context.define_static(tcx, &mut type_dbg, def_id, data_id);
+                        }
+                    }
+                    MonoItem::GlobalAsm(item_id) => {
+                        crate::global_asm::codegen_global_asm_item(
+                            tcx,
+                            &mut cx.global_asm,
+                            item_id,
+                        );
+                    }
+                }
+            }
+            crate::main_shim::maybe_create_entry_wrapper(
+                tcx,
+                &mut module,
+                &mut cx.unwind_context,
+                false,
+                cgu.is_primary(),
+            );
+
+            let cgu_name = cgu.name().as_str().to_owned();
+
+            (cgu_name, cx, module, codegened_functions)
+        });
+
+    let producer = crate::debuginfo::producer(tcx.sess);
+
+    OngoingModuleCodegen::Async(std::thread::spawn(move || {
+        cx.profiler.clone().generic_activity_with_arg("compile functions", &*cgu_name).run(|| {
+            cranelift_codegen::timing::set_thread_profiler(Box::new(super::MeasuremeProfiler(
+                cx.profiler.clone(),
+            )));
+
+            let mut cached_context = Context::new();
+            for codegened_func in codegened_functions {
+                crate::base::compile_fn(&mut cx, &mut cached_context, &mut module, codegened_func);
+            }
+        });
+
+        let global_asm_object_file =
+            cx.profiler.generic_activity_with_arg("compile assembly", &*cgu_name).run(|| {
+                crate::global_asm::compile_global_asm(&global_asm_config, &cgu_name, &cx.global_asm)
+            })?;
+
+        let codegen_result =
+            cx.profiler.generic_activity_with_arg("write object file", &*cgu_name).run(|| {
+                emit_cgu(
+                    &global_asm_config.output_filenames,
+                    &cx.profiler,
+                    cgu_name,
+                    module,
+                    cx.debug_context,
+                    cx.unwind_context,
+                    global_asm_object_file,
+                    &producer,
+                )
+            });
+        std::mem::drop(token);
+        codegen_result
+    }))
+}
+
+pub(crate) fn run_aot(
+    tcx: TyCtxt<'_>,
+    backend_config: BackendConfig,
+    metadata: EncodedMetadata,
+    need_metadata_module: bool,
+) -> Box<OngoingCodegen> {
+    // FIXME handle `-Ctarget-cpu=native`
+    let target_cpu = match tcx.sess.opts.cg.target_cpu {
+        Some(ref name) => name,
+        None => tcx.sess.target.cpu.as_ref(),
+    }
+    .to_owned();
+
+    let cgus = if tcx.sess.opts.output_types.should_codegen() {
+        tcx.collect_and_partition_mono_items(()).1
+    } else {
+        // If only `--emit metadata` is used, we shouldn't perform any codegen.
+        // Also `tcx.collect_and_partition_mono_items` may panic in that case.
+        return Box::new(OngoingCodegen {
+            modules: vec![],
+            allocator_module: None,
+            metadata_module: None,
+            metadata,
+            crate_info: CrateInfo::new(tcx, target_cpu),
+            concurrency_limiter: ConcurrencyLimiter::new(tcx.sess, 0),
+        });
+    };
+
+    if tcx.dep_graph.is_fully_enabled() {
+        for cgu in cgus {
+            tcx.ensure().codegen_unit(cgu.name());
+        }
+    }
+
+    // Calculate the CGU reuse
+    let cgu_reuse = tcx.sess.time("find_cgu_reuse", || {
+        cgus.iter().map(|cgu| determine_cgu_reuse(tcx, &cgu)).collect::<Vec<_>>()
+    });
+
+    rustc_codegen_ssa::assert_module_sources::assert_module_sources(tcx, &|cgu_reuse_tracker| {
+        for (i, cgu) in cgus.iter().enumerate() {
+            let cgu_reuse = cgu_reuse[i];
+            cgu_reuse_tracker.set_actual_reuse(cgu.name().as_str(), cgu_reuse);
+        }
+    });
+
+    let global_asm_config = Arc::new(crate::global_asm::GlobalAsmConfig::new(tcx));
+
+    let (todo_cgus, done_cgus) =
+        cgus.into_iter().enumerate().partition::<Vec<_>, _>(|&(i, _)| match cgu_reuse[i] {
+            _ if backend_config.disable_incr_cache => true,
+            CguReuse::No => true,
+            CguReuse::PreLto | CguReuse::PostLto => false,
+        });
+
+    let concurrency_limiter = IntoDynSyncSend(ConcurrencyLimiter::new(tcx.sess, todo_cgus.len()));
+
+    let modules = tcx.sess.time("codegen mono items", || {
+        let mut modules: Vec<_> = par_map(todo_cgus, |(_, cgu)| {
+            let dep_node = cgu.codegen_dep_node(tcx);
+            tcx.dep_graph
+                .with_task(
+                    dep_node,
+                    tcx,
+                    (
+                        backend_config.clone(),
+                        global_asm_config.clone(),
+                        cgu.name(),
+                        concurrency_limiter.acquire(tcx.dcx()),
+                    ),
+                    module_codegen,
+                    Some(rustc_middle::dep_graph::hash_result),
+                )
+                .0
+        });
+        modules.extend(
+            done_cgus
+                .into_iter()
+                .map(|(_, cgu)| OngoingModuleCodegen::Sync(reuse_workproduct_for_cgu(tcx, cgu))),
+        );
+        modules
+    });
+
+    let mut allocator_module = make_module(tcx.sess, &backend_config, "allocator_shim".to_string());
+    let mut allocator_unwind_context = UnwindContext::new(allocator_module.isa(), true);
+    let created_alloc_shim =
+        crate::allocator::codegen(tcx, &mut allocator_module, &mut allocator_unwind_context);
+
+    let allocator_module = if created_alloc_shim {
+        let mut product = allocator_module.finish();
+        allocator_unwind_context.emit(&mut product);
+
+        match emit_module(
+            tcx.output_filenames(()),
+            &tcx.sess.prof,
+            product.object,
+            ModuleKind::Allocator,
+            "allocator_shim".to_owned(),
+            &crate::debuginfo::producer(tcx.sess),
+        ) {
+            Ok(allocator_module) => Some(allocator_module),
+            Err(err) => tcx.dcx().fatal(err),
+        }
+    } else {
+        None
+    };
+
+    let metadata_module = if need_metadata_module {
+        let (metadata_cgu_name, tmp_file) = tcx.sess.time("write compressed metadata", || {
+            use rustc_middle::mir::mono::CodegenUnitNameBuilder;
+
+            let cgu_name_builder = &mut CodegenUnitNameBuilder::new(tcx);
+            let metadata_cgu_name = cgu_name_builder
+                .build_cgu_name(LOCAL_CRATE, ["crate"], Some("metadata"))
+                .as_str()
+                .to_string();
+
+            let tmp_file =
+                tcx.output_filenames(()).temp_path(OutputType::Metadata, Some(&metadata_cgu_name));
+
+            let symbol_name = rustc_middle::middle::exported_symbols::metadata_symbol_name(tcx);
+            let obj = create_compressed_metadata_file(tcx.sess, &metadata, &symbol_name);
+
+            if let Err(err) = std::fs::write(&tmp_file, obj) {
+                tcx.dcx().fatal(format!("error writing metadata object file: {}", err));
+            }
+
+            (metadata_cgu_name, tmp_file)
+        });
+
+        Some(CompiledModule {
+            name: metadata_cgu_name,
+            kind: ModuleKind::Metadata,
+            object: Some(tmp_file),
+            dwarf_object: None,
+            bytecode: None,
+            assembly: None,
+            llvm_ir: None,
+        })
+    } else {
+        None
+    };
+
+    Box::new(OngoingCodegen {
+        modules,
+        allocator_module,
+        metadata_module,
+        metadata,
+        crate_info: CrateInfo::new(tcx, target_cpu),
+        concurrency_limiter: concurrency_limiter.0,
+    })
+}
diff --git a/compiler/rustc_codegen_cranelift/src/driver/jit.rs b/compiler/rustc_codegen_cranelift/src/driver/jit.rs
new file mode 100644
index 00000000000..4b149131b61
--- /dev/null
+++ b/compiler/rustc_codegen_cranelift/src/driver/jit.rs
@@ -0,0 +1,417 @@
+//! The JIT driver uses [`cranelift_jit`] to JIT execute programs without writing any object
+//! files.
+
+use std::cell::RefCell;
+use std::ffi::CString;
+use std::os::raw::{c_char, c_int};
+use std::sync::{mpsc, Mutex, OnceLock};
+
+use cranelift_frontend::{FunctionBuilder, FunctionBuilderContext};
+use cranelift_jit::{JITBuilder, JITModule};
+use rustc_codegen_ssa::CrateInfo;
+use rustc_middle::mir::mono::MonoItem;
+use rustc_session::Session;
+use rustc_span::Symbol;
+
+use crate::debuginfo::TypeDebugContext;
+use crate::{prelude::*, BackendConfig};
+use crate::{CodegenCx, CodegenMode};
+
+struct JitState {
+    backend_config: BackendConfig,
+    jit_module: JITModule,
+}
+
+thread_local! {
+    static LAZY_JIT_STATE: RefCell<Option<JitState>> = const { RefCell::new(None) };
+}
+
+/// The Sender owned by the rustc thread
+static GLOBAL_MESSAGE_SENDER: OnceLock<Mutex<mpsc::Sender<UnsafeMessage>>> = OnceLock::new();
+
+/// A message that is sent from the jitted runtime to the rustc thread.
+/// Senders are responsible for upholding `Send` semantics.
+enum UnsafeMessage {
+    /// Request that the specified `Instance` be lazily jitted.
+    ///
+    /// Nothing accessible through `instance_ptr` may be moved or mutated by the sender after
+    /// this message is sent.
+    JitFn {
+        instance_ptr: *const Instance<'static>,
+        trampoline_ptr: *const u8,
+        tx: mpsc::Sender<*const u8>,
+    },
+}
+unsafe impl Send for UnsafeMessage {}
+
+impl UnsafeMessage {
+    /// Send the message.
+    fn send(self) -> Result<(), mpsc::SendError<UnsafeMessage>> {
+        thread_local! {
+            /// The Sender owned by the local thread
+            static LOCAL_MESSAGE_SENDER: mpsc::Sender<UnsafeMessage> =
+                GLOBAL_MESSAGE_SENDER
+                    .get().unwrap()
+                    .lock().unwrap()
+                    .clone();
+        }
+        LOCAL_MESSAGE_SENDER.with(|sender| sender.send(self))
+    }
+}
+
+fn create_jit_module(
+    tcx: TyCtxt<'_>,
+    backend_config: &BackendConfig,
+    hotswap: bool,
+) -> (JITModule, CodegenCx) {
+    let crate_info = CrateInfo::new(tcx, "dummy_target_cpu".to_string());
+
+    let isa = crate::build_isa(tcx.sess, backend_config);
+    let mut jit_builder = JITBuilder::with_isa(isa, cranelift_module::default_libcall_names());
+    jit_builder.hotswap(hotswap);
+    crate::compiler_builtins::register_functions_for_jit(&mut jit_builder);
+    jit_builder.symbol_lookup_fn(dep_symbol_lookup_fn(tcx.sess, crate_info));
+    jit_builder.symbol("__clif_jit_fn", clif_jit_fn as *const u8);
+    let mut jit_module = JITModule::new(jit_builder);
+
+    let mut cx = crate::CodegenCx::new(
+        tcx,
+        backend_config.clone(),
+        jit_module.isa(),
+        false,
+        Symbol::intern("dummy_cgu_name"),
+    );
+
+    crate::allocator::codegen(tcx, &mut jit_module, &mut cx.unwind_context);
+
+    (jit_module, cx)
+}
+
+pub(crate) fn run_jit(tcx: TyCtxt<'_>, backend_config: BackendConfig) -> ! {
+    if !tcx.sess.opts.output_types.should_codegen() {
+        tcx.dcx().fatal("JIT mode doesn't work with `cargo check`");
+    }
+
+    if !tcx.crate_types().contains(&rustc_session::config::CrateType::Executable) {
+        tcx.dcx().fatal("can't jit non-executable crate");
+    }
+
+    let (mut jit_module, mut cx) = create_jit_module(
+        tcx,
+        &backend_config,
+        matches!(backend_config.codegen_mode, CodegenMode::JitLazy),
+    );
+    let mut cached_context = Context::new();
+
+    let (_, cgus) = tcx.collect_and_partition_mono_items(());
+    let mono_items = cgus
+        .iter()
+        .map(|cgu| cgu.items_in_deterministic_order(tcx).into_iter())
+        .flatten()
+        .collect::<FxHashMap<_, _>>()
+        .into_iter()
+        .collect::<Vec<(_, _)>>();
+
+    tcx.sess.time("codegen mono items", || {
+        super::predefine_mono_items(tcx, &mut jit_module, &mono_items);
+        for (mono_item, _) in mono_items {
+            match mono_item {
+                MonoItem::Fn(inst) => match backend_config.codegen_mode {
+                    CodegenMode::Aot => unreachable!(),
+                    CodegenMode::Jit => {
+                        codegen_and_compile_fn(
+                            tcx,
+                            &mut cx,
+                            &mut cached_context,
+                            &mut jit_module,
+                            inst,
+                        );
+                    }
+                    CodegenMode::JitLazy => {
+                        codegen_shim(tcx, &mut cx, &mut cached_context, &mut jit_module, inst)
+                    }
+                },
+                MonoItem::Static(def_id) => {
+                    crate::constant::codegen_static(tcx, &mut jit_module, def_id);
+                }
+                MonoItem::GlobalAsm(item_id) => {
+                    let item = tcx.hir().item(item_id);
+                    tcx.dcx().span_fatal(item.span, "Global asm is not supported in JIT mode");
+                }
+            }
+        }
+    });
+
+    if !cx.global_asm.is_empty() {
+        tcx.dcx().fatal("Inline asm is not supported in JIT mode");
+    }
+
+    crate::main_shim::maybe_create_entry_wrapper(
+        tcx,
+        &mut jit_module,
+        &mut cx.unwind_context,
+        true,
+        true,
+    );
+
+    tcx.dcx().abort_if_errors();
+
+    jit_module.finalize_definitions().unwrap();
+    unsafe { cx.unwind_context.register_jit(&jit_module) };
+
+    println!(
+        "Rustc codegen cranelift will JIT run the executable, because -Cllvm-args=mode=jit was passed"
+    );
+
+    let args = std::iter::once(&*tcx.crate_name(LOCAL_CRATE).as_str().to_string())
+        .chain(backend_config.jit_args.iter().map(|arg| &**arg))
+        .map(|arg| CString::new(arg).unwrap())
+        .collect::<Vec<_>>();
+
+    let start_sig = Signature {
+        params: vec![
+            AbiParam::new(jit_module.target_config().pointer_type()),
+            AbiParam::new(jit_module.target_config().pointer_type()),
+        ],
+        returns: vec![AbiParam::new(jit_module.target_config().pointer_type() /*isize*/)],
+        call_conv: jit_module.target_config().default_call_conv,
+    };
+    let start_func_id = jit_module.declare_function("main", Linkage::Import, &start_sig).unwrap();
+    let finalized_start: *const u8 = jit_module.get_finalized_function(start_func_id);
+
+    LAZY_JIT_STATE.with(|lazy_jit_state| {
+        let mut lazy_jit_state = lazy_jit_state.borrow_mut();
+        assert!(lazy_jit_state.is_none());
+        *lazy_jit_state = Some(JitState { backend_config, jit_module });
+    });
+
+    let f: extern "C" fn(c_int, *const *const c_char) -> c_int =
+        unsafe { ::std::mem::transmute(finalized_start) };
+
+    let (tx, rx) = mpsc::channel();
+    GLOBAL_MESSAGE_SENDER.set(Mutex::new(tx)).unwrap();
+
+    // Spawn the jitted runtime in a new thread so that this rustc thread can handle messages
+    // (eg to lazily JIT further functions as required)
+    std::thread::spawn(move || {
+        let mut argv = args.iter().map(|arg| arg.as_ptr()).collect::<Vec<_>>();
+
+        // Push a null pointer as a terminating argument. This is required by POSIX and
+        // useful as some dynamic linkers use it as a marker to jump over.
+        argv.push(std::ptr::null());
+
+        let ret = f(args.len() as c_int, argv.as_ptr());
+        std::process::exit(ret);
+    });
+
+    // Handle messages
+    loop {
+        match rx.recv().unwrap() {
+            // lazy JIT compilation request - compile requested instance and return pointer to result
+            UnsafeMessage::JitFn { instance_ptr, trampoline_ptr, tx } => {
+                tx.send(jit_fn(instance_ptr, trampoline_ptr))
+                    .expect("jitted runtime hung up before response to lazy JIT request was sent");
+            }
+        }
+    }
+}
+
+pub(crate) fn codegen_and_compile_fn<'tcx>(
+    tcx: TyCtxt<'tcx>,
+    cx: &mut crate::CodegenCx,
+    cached_context: &mut Context,
+    module: &mut dyn Module,
+    instance: Instance<'tcx>,
+) {
+    cranelift_codegen::timing::set_thread_profiler(Box::new(super::MeasuremeProfiler(
+        cx.profiler.clone(),
+    )));
+
+    tcx.prof.generic_activity("codegen and compile fn").run(|| {
+        let _inst_guard =
+            crate::PrintOnPanic(|| format!("{:?} {}", instance, tcx.symbol_name(instance).name));
+
+        let cached_func = std::mem::replace(&mut cached_context.func, Function::new());
+        if let Some(codegened_func) = crate::base::codegen_fn(
+            tcx,
+            cx,
+            &mut TypeDebugContext::default(),
+            cached_func,
+            module,
+            instance,
+        ) {
+            crate::base::compile_fn(cx, cached_context, module, codegened_func);
+        }
+    });
+}
+
+extern "C" fn clif_jit_fn(
+    instance_ptr: *const Instance<'static>,
+    trampoline_ptr: *const u8,
+) -> *const u8 {
+    // send the JIT request to the rustc thread, with a channel for the response
+    let (tx, rx) = mpsc::channel();
+    UnsafeMessage::JitFn { instance_ptr, trampoline_ptr, tx }
+        .send()
+        .expect("rustc thread hung up before lazy JIT request was sent");
+
+    // block on JIT compilation result
+    rx.recv().expect("rustc thread hung up before responding to sent lazy JIT request")
+}
+
+fn jit_fn(instance_ptr: *const Instance<'static>, trampoline_ptr: *const u8) -> *const u8 {
+    rustc_middle::ty::tls::with(|tcx| {
+        // lift is used to ensure the correct lifetime for instance.
+        let instance = tcx.lift(unsafe { *instance_ptr }).unwrap();
+
+        LAZY_JIT_STATE.with(|lazy_jit_state| {
+            let mut lazy_jit_state = lazy_jit_state.borrow_mut();
+            let lazy_jit_state = lazy_jit_state.as_mut().unwrap();
+            let jit_module = &mut lazy_jit_state.jit_module;
+            let backend_config = lazy_jit_state.backend_config.clone();
+
+            let name = tcx.symbol_name(instance).name;
+            let sig = crate::abi::get_function_sig(
+                tcx,
+                jit_module.target_config().default_call_conv,
+                instance,
+            );
+            let func_id = jit_module.declare_function(name, Linkage::Export, &sig).unwrap();
+
+            let current_ptr = jit_module.read_got_entry(func_id);
+
+            // If the function's GOT entry has already been updated to point at something other
+            // than the shim trampoline, don't re-jit but just return the new pointer instead.
+            // This does not need synchronization as this code is executed only by a sole rustc
+            // thread.
+            if current_ptr != trampoline_ptr {
+                return current_ptr;
+            }
+
+            jit_module.prepare_for_function_redefine(func_id).unwrap();
+
+            let mut cx = crate::CodegenCx::new(
+                tcx,
+                backend_config,
+                jit_module.isa(),
+                false,
+                Symbol::intern("dummy_cgu_name"),
+            );
+            codegen_and_compile_fn(tcx, &mut cx, &mut Context::new(), jit_module, instance);
+
+            assert!(cx.global_asm.is_empty());
+            jit_module.finalize_definitions().unwrap();
+            unsafe { cx.unwind_context.register_jit(&jit_module) };
+            jit_module.get_finalized_function(func_id)
+        })
+    })
+}
+
+fn dep_symbol_lookup_fn(
+    sess: &Session,
+    crate_info: CrateInfo,
+) -> Box<dyn Fn(&str) -> Option<*const u8>> {
+    use rustc_middle::middle::dependency_format::Linkage;
+
+    let mut dylib_paths = Vec::new();
+
+    let data = &crate_info
+        .dependency_formats
+        .iter()
+        .find(|(crate_type, _data)| *crate_type == rustc_session::config::CrateType::Executable)
+        .unwrap()
+        .1;
+    // `used_crates` is in reverse postorder in terms of dependencies. Reverse the order here to
+    // get a postorder which ensures that all dependencies of a dylib are loaded before the dylib
+    // itself. This helps the dynamic linker to find dylibs not in the regular dynamic library
+    // search path.
+    for &cnum in crate_info.used_crates.iter().rev() {
+        let src = &crate_info.used_crate_source[&cnum];
+        match data[cnum.as_usize() - 1] {
+            Linkage::NotLinked | Linkage::IncludedFromDylib => {}
+            Linkage::Static => {
+                let name = crate_info.crate_name[&cnum];
+                let mut diag = sess.dcx().struct_err(format!("Can't load static lib {}", name));
+                diag.note("rustc_codegen_cranelift can only load dylibs in JIT mode.");
+                diag.emit();
+            }
+            Linkage::Dynamic => {
+                dylib_paths.push(src.dylib.as_ref().unwrap().0.clone());
+            }
+        }
+    }
+
+    let imported_dylibs = Box::leak(
+        dylib_paths
+            .into_iter()
+            .map(|path| unsafe { libloading::Library::new(&path).unwrap() })
+            .collect::<Box<[_]>>(),
+    );
+
+    sess.dcx().abort_if_errors();
+
+    Box::new(move |sym_name| {
+        for dylib in &*imported_dylibs {
+            if let Ok(sym) = unsafe { dylib.get::<*const u8>(sym_name.as_bytes()) } {
+                return Some(*sym);
+            }
+        }
+        None
+    })
+}
+
+fn codegen_shim<'tcx>(
+    tcx: TyCtxt<'tcx>,
+    cx: &mut CodegenCx,
+    cached_context: &mut Context,
+    module: &mut JITModule,
+    inst: Instance<'tcx>,
+) {
+    let pointer_type = module.target_config().pointer_type();
+
+    let name = tcx.symbol_name(inst).name;
+    let sig = crate::abi::get_function_sig(tcx, module.target_config().default_call_conv, inst);
+    let func_id = module.declare_function(name, Linkage::Export, &sig).unwrap();
+
+    let instance_ptr = Box::into_raw(Box::new(inst));
+
+    let jit_fn = module
+        .declare_function(
+            "__clif_jit_fn",
+            Linkage::Import,
+            &Signature {
+                call_conv: module.target_config().default_call_conv,
+                params: vec![AbiParam::new(pointer_type), AbiParam::new(pointer_type)],
+                returns: vec![AbiParam::new(pointer_type)],
+            },
+        )
+        .unwrap();
+
+    let context = cached_context;
+    context.clear();
+    let trampoline = &mut context.func;
+    trampoline.signature = sig.clone();
+
+    let mut builder_ctx = FunctionBuilderContext::new();
+    let mut trampoline_builder = FunctionBuilder::new(trampoline, &mut builder_ctx);
+
+    let trampoline_fn = module.declare_func_in_func(func_id, trampoline_builder.func);
+    let jit_fn = module.declare_func_in_func(jit_fn, trampoline_builder.func);
+    let sig_ref = trampoline_builder.func.import_signature(sig);
+
+    let entry_block = trampoline_builder.create_block();
+    trampoline_builder.append_block_params_for_function_params(entry_block);
+    let fn_args = trampoline_builder.func.dfg.block_params(entry_block).to_vec();
+
+    trampoline_builder.switch_to_block(entry_block);
+    let instance_ptr = trampoline_builder.ins().iconst(pointer_type, instance_ptr as u64 as i64);
+    let trampoline_ptr = trampoline_builder.ins().func_addr(pointer_type, trampoline_fn);
+    let jitted_fn = trampoline_builder.ins().call(jit_fn, &[instance_ptr, trampoline_ptr]);
+    let jitted_fn = trampoline_builder.func.dfg.inst_results(jitted_fn)[0];
+    let call_inst = trampoline_builder.ins().call_indirect(sig_ref, jitted_fn, &fn_args);
+    let ret_vals = trampoline_builder.func.dfg.inst_results(call_inst).to_vec();
+    trampoline_builder.ins().return_(&ret_vals);
+
+    module.define_function(func_id, context).unwrap();
+    cx.unwind_context.add_function(func_id, context, module.isa());
+}
diff --git a/compiler/rustc_codegen_cranelift/src/driver/mod.rs b/compiler/rustc_codegen_cranelift/src/driver/mod.rs
new file mode 100644
index 00000000000..fb0eed07c19
--- /dev/null
+++ b/compiler/rustc_codegen_cranelift/src/driver/mod.rs
@@ -0,0 +1,84 @@
+//! Drivers are responsible for calling [`codegen_fn`] or [`codegen_static`] for each mono item and
+//! performing any further actions like JIT executing or writing object files.
+//!
+//! [`codegen_fn`]: crate::base::codegen_fn
+//! [`codegen_static`]: crate::constant::codegen_static
+
+use rustc_data_structures::profiling::SelfProfilerRef;
+use rustc_middle::middle::codegen_fn_attrs::CodegenFnAttrFlags;
+use rustc_middle::mir::mono::{MonoItem, MonoItemData};
+
+use crate::prelude::*;
+
+pub(crate) mod aot;
+#[cfg(feature = "jit")]
+pub(crate) mod jit;
+
+fn predefine_mono_items<'tcx>(
+    tcx: TyCtxt<'tcx>,
+    module: &mut dyn Module,
+    mono_items: &[(MonoItem<'tcx>, MonoItemData)],
+) {
+    tcx.prof.generic_activity("predefine functions").run(|| {
+        let is_compiler_builtins = tcx.is_compiler_builtins(LOCAL_CRATE);
+        for &(mono_item, data) in mono_items {
+            match mono_item {
+                MonoItem::Fn(instance) => {
+                    let name = tcx.symbol_name(instance).name;
+                    let _inst_guard = crate::PrintOnPanic(|| format!("{:?} {}", instance, name));
+                    let sig =
+                        get_function_sig(tcx, module.target_config().default_call_conv, instance);
+                    let linkage = crate::linkage::get_clif_linkage(
+                        mono_item,
+                        data.linkage,
+                        data.visibility,
+                        is_compiler_builtins,
+                    );
+                    let is_naked = tcx
+                        .codegen_fn_attrs(instance.def_id())
+                        .flags
+                        .contains(CodegenFnAttrFlags::NAKED);
+                    module
+                        .declare_function(
+                            name,
+                            // Naked functions are defined in a separate object
+                            // file from the codegen unit rustc expects them to
+                            // be defined in.
+                            if is_naked { Linkage::Import } else { linkage },
+                            &sig,
+                        )
+                        .unwrap();
+                }
+                MonoItem::Static(_) | MonoItem::GlobalAsm(_) => {}
+            }
+        }
+    });
+}
+
+struct MeasuremeProfiler(SelfProfilerRef);
+
+struct TimingGuard {
+    profiler: std::mem::ManuallyDrop<SelfProfilerRef>,
+    inner: Option<rustc_data_structures::profiling::TimingGuard<'static>>,
+}
+
+impl Drop for TimingGuard {
+    fn drop(&mut self) {
+        self.inner.take();
+        unsafe {
+            std::mem::ManuallyDrop::drop(&mut self.profiler);
+        }
+    }
+}
+
+impl cranelift_codegen::timing::Profiler for MeasuremeProfiler {
+    fn start_pass(&self, pass: cranelift_codegen::timing::Pass) -> Box<dyn std::any::Any> {
+        let mut timing_guard =
+            TimingGuard { profiler: std::mem::ManuallyDrop::new(self.0.clone()), inner: None };
+        timing_guard.inner = Some(
+            unsafe { &*(&*timing_guard.profiler as &SelfProfilerRef as *const SelfProfilerRef) }
+                .generic_activity(pass.description()),
+        );
+        Box::new(timing_guard)
+    }
+}
diff --git a/compiler/rustc_codegen_cranelift/src/global_asm.rs b/compiler/rustc_codegen_cranelift/src/global_asm.rs
new file mode 100644
index 00000000000..0c99a5ce12f
--- /dev/null
+++ b/compiler/rustc_codegen_cranelift/src/global_asm.rs
@@ -0,0 +1,225 @@
+//! The AOT driver uses [`cranelift_object`] to write object files suitable for linking into a
+//! standalone executable.
+
+use std::io::Write;
+use std::path::PathBuf;
+use std::process::{Command, Stdio};
+use std::sync::Arc;
+
+use rustc_ast::{InlineAsmOptions, InlineAsmTemplatePiece};
+use rustc_hir::{InlineAsmOperand, ItemId};
+use rustc_middle::mir::interpret::ErrorHandled;
+use rustc_session::config::{OutputFilenames, OutputType};
+use rustc_target::asm::InlineAsmArch;
+
+use crate::prelude::*;
+
+pub(crate) fn codegen_global_asm_item(tcx: TyCtxt<'_>, global_asm: &mut String, item_id: ItemId) {
+    let item = tcx.hir().item(item_id);
+    if let rustc_hir::ItemKind::GlobalAsm(asm) = item.kind {
+        let is_x86 =
+            matches!(tcx.sess.asm_arch.unwrap(), InlineAsmArch::X86 | InlineAsmArch::X86_64);
+
+        if is_x86 {
+            if !asm.options.contains(InlineAsmOptions::ATT_SYNTAX) {
+                global_asm.push_str("\n.intel_syntax noprefix\n");
+            } else {
+                global_asm.push_str("\n.att_syntax\n");
+            }
+        }
+        for piece in asm.template {
+            match *piece {
+                InlineAsmTemplatePiece::String(ref s) => global_asm.push_str(s),
+                InlineAsmTemplatePiece::Placeholder { operand_idx, modifier: _, span: op_sp } => {
+                    match asm.operands[operand_idx].0 {
+                        InlineAsmOperand::Const { ref anon_const } => {
+                            match tcx.const_eval_poly(anon_const.def_id.to_def_id()) {
+                                Ok(const_value) => {
+                                    let ty = tcx
+                                        .typeck_body(anon_const.body)
+                                        .node_type(anon_const.hir_id);
+                                    let string = rustc_codegen_ssa::common::asm_const_to_str(
+                                        tcx,
+                                        op_sp,
+                                        const_value,
+                                        RevealAllLayoutCx(tcx).layout_of(ty),
+                                    );
+                                    global_asm.push_str(&string);
+                                }
+                                Err(ErrorHandled::Reported { .. }) => {
+                                    // An error has already been reported and compilation is
+                                    // guaranteed to fail if execution hits this path.
+                                }
+                                Err(ErrorHandled::TooGeneric(_)) => {
+                                    span_bug!(op_sp, "asm const cannot be resolved; too generic");
+                                }
+                            }
+                        }
+                        InlineAsmOperand::SymFn { anon_const } => {
+                            if cfg!(not(feature = "inline_asm_sym")) {
+                                tcx.dcx().span_err(
+                                    item.span,
+                                    "asm! and global_asm! sym operands are not yet supported",
+                                );
+                            }
+
+                            let ty = tcx.typeck_body(anon_const.body).node_type(anon_const.hir_id);
+                            let instance = match ty.kind() {
+                                &ty::FnDef(def_id, args) => Instance::new(def_id, args),
+                                _ => span_bug!(op_sp, "asm sym is not a function"),
+                            };
+                            let symbol = tcx.symbol_name(instance);
+                            // FIXME handle the case where the function was made private to the
+                            // current codegen unit
+                            global_asm.push_str(symbol.name);
+                        }
+                        InlineAsmOperand::SymStatic { path: _, def_id } => {
+                            if cfg!(not(feature = "inline_asm_sym")) {
+                                tcx.dcx().span_err(
+                                    item.span,
+                                    "asm! and global_asm! sym operands are not yet supported",
+                                );
+                            }
+
+                            let instance = Instance::mono(tcx, def_id);
+                            let symbol = tcx.symbol_name(instance);
+                            global_asm.push_str(symbol.name);
+                        }
+                        InlineAsmOperand::In { .. }
+                        | InlineAsmOperand::Out { .. }
+                        | InlineAsmOperand::InOut { .. }
+                        | InlineAsmOperand::SplitInOut { .. }
+                        | InlineAsmOperand::Label { .. } => {
+                            span_bug!(op_sp, "invalid operand type for global_asm!")
+                        }
+                    }
+                }
+            }
+        }
+
+        global_asm.push('\n');
+        if is_x86 {
+            global_asm.push_str(".att_syntax\n\n");
+        }
+    } else {
+        bug!("Expected GlobalAsm found {:?}", item);
+    }
+}
+
+#[derive(Debug)]
+pub(crate) struct GlobalAsmConfig {
+    assembler: PathBuf,
+    target: String,
+    pub(crate) output_filenames: Arc<OutputFilenames>,
+}
+
+impl GlobalAsmConfig {
+    pub(crate) fn new(tcx: TyCtxt<'_>) -> Self {
+        GlobalAsmConfig {
+            assembler: crate::toolchain::get_toolchain_binary(tcx.sess, "as"),
+            target: match &tcx.sess.opts.target_triple {
+                rustc_target::spec::TargetTriple::TargetTriple(triple) => triple.clone(),
+                rustc_target::spec::TargetTriple::TargetJson { path_for_rustdoc, .. } => {
+                    path_for_rustdoc.to_str().unwrap().to_owned()
+                }
+            },
+            output_filenames: tcx.output_filenames(()).clone(),
+        }
+    }
+}
+
+pub(crate) fn compile_global_asm(
+    config: &GlobalAsmConfig,
+    cgu_name: &str,
+    global_asm: &str,
+) -> Result<Option<PathBuf>, String> {
+    if global_asm.is_empty() {
+        return Ok(None);
+    }
+
+    // Remove all LLVM style comments
+    let mut global_asm = global_asm
+        .lines()
+        .map(|line| if let Some(index) = line.find("//") { &line[0..index] } else { line })
+        .collect::<Vec<_>>()
+        .join("\n");
+    global_asm.push('\n');
+
+    let global_asm_object_file = add_file_stem_postfix(
+        config.output_filenames.temp_path(OutputType::Object, Some(cgu_name)),
+        ".asm",
+    );
+
+    // Assemble `global_asm`
+    if option_env!("CG_CLIF_FORCE_GNU_AS").is_some() {
+        let mut child = Command::new(&config.assembler)
+            .arg("-o")
+            .arg(&global_asm_object_file)
+            .stdin(Stdio::piped())
+            .spawn()
+            .expect("Failed to spawn `as`.");
+        child.stdin.take().unwrap().write_all(global_asm.as_bytes()).unwrap();
+        let status = child.wait().expect("Failed to wait for `as`.");
+        if !status.success() {
+            return Err(format!("Failed to assemble `{}`", global_asm));
+        }
+    } else {
+        let mut child = Command::new(std::env::current_exe().unwrap())
+            // Avoid a warning about the jobserver fd not being passed
+            .env_remove("CARGO_MAKEFLAGS")
+            .arg("--target")
+            .arg(&config.target)
+            .arg("--crate-type")
+            .arg("staticlib")
+            .arg("--emit")
+            .arg("obj")
+            .arg("-o")
+            .arg(&global_asm_object_file)
+            .arg("-")
+            .arg("-Abad_asm_style")
+            .arg("-Zcodegen-backend=llvm")
+            .stdin(Stdio::piped())
+            .spawn()
+            .expect("Failed to spawn `as`.");
+        let mut stdin = child.stdin.take().unwrap();
+        stdin
+            .write_all(
+                br####"
+                #![feature(decl_macro, no_core, rustc_attrs)]
+                #![allow(internal_features)]
+                #![no_core]
+                #[rustc_builtin_macro]
+                #[rustc_macro_transparency = "semitransparent"]
+                macro global_asm() { /* compiler built-in */ }
+                global_asm!(r###"
+                "####,
+            )
+            .unwrap();
+        stdin.write_all(global_asm.as_bytes()).unwrap();
+        stdin
+            .write_all(
+                br####"
+                "###);
+                "####,
+            )
+            .unwrap();
+        std::mem::drop(stdin);
+        let status = child.wait().expect("Failed to wait for `as`.");
+        if !status.success() {
+            return Err(format!("Failed to assemble `{}`", global_asm));
+        }
+    }
+
+    Ok(Some(global_asm_object_file))
+}
+
+pub(crate) fn add_file_stem_postfix(mut path: PathBuf, postfix: &str) -> PathBuf {
+    let mut new_filename = path.file_stem().unwrap().to_owned();
+    new_filename.push(postfix);
+    if let Some(extension) = path.extension() {
+        new_filename.push(".");
+        new_filename.push(extension);
+    }
+    path.set_file_name(new_filename);
+    path
+}
diff --git a/compiler/rustc_codegen_cranelift/src/inline_asm.rs b/compiler/rustc_codegen_cranelift/src/inline_asm.rs
new file mode 100644
index 00000000000..2de804f5e04
--- /dev/null
+++ b/compiler/rustc_codegen_cranelift/src/inline_asm.rs
@@ -0,0 +1,946 @@
+//! Codegen of `asm!` invocations.
+
+use std::fmt::Write;
+
+use cranelift_codegen::isa::CallConv;
+use rustc_ast::ast::{InlineAsmOptions, InlineAsmTemplatePiece};
+use rustc_span::sym;
+use rustc_target::asm::*;
+use target_lexicon::BinaryFormat;
+
+use crate::prelude::*;
+
+pub(crate) enum CInlineAsmOperand<'tcx> {
+    In {
+        reg: InlineAsmRegOrRegClass,
+        value: Value,
+    },
+    Out {
+        reg: InlineAsmRegOrRegClass,
+        late: bool,
+        place: Option<CPlace<'tcx>>,
+    },
+    InOut {
+        reg: InlineAsmRegOrRegClass,
+        _late: bool,
+        in_value: Value,
+        out_place: Option<CPlace<'tcx>>,
+    },
+    Const {
+        value: String,
+    },
+    Symbol {
+        symbol: String,
+    },
+}
+
+pub(crate) fn codegen_inline_asm_terminator<'tcx>(
+    fx: &mut FunctionCx<'_, '_, 'tcx>,
+    span: Span,
+    template: &[InlineAsmTemplatePiece],
+    operands: &[InlineAsmOperand<'tcx>],
+    options: InlineAsmOptions,
+    destination: Option<mir::BasicBlock>,
+) {
+    // Used by panic_abort on Windows, but uses a syntax which only happens to work with
+    // asm!() by accident and breaks with the GNU assembler as well as global_asm!() for
+    // the LLVM backend.
+    if template.len() == 1
+        && template[0] == InlineAsmTemplatePiece::String("int $$0x29".to_string())
+    {
+        fx.bcx.ins().trap(TrapCode::User(1));
+        return;
+    }
+
+    let operands = operands
+        .iter()
+        .map(|operand| match *operand {
+            InlineAsmOperand::In { reg, ref value } => CInlineAsmOperand::In {
+                reg,
+                value: crate::base::codegen_operand(fx, value).load_scalar(fx),
+            },
+            InlineAsmOperand::Out { reg, late, ref place } => CInlineAsmOperand::Out {
+                reg,
+                late,
+                place: place.map(|place| crate::base::codegen_place(fx, place)),
+            },
+            InlineAsmOperand::InOut { reg, late, ref in_value, ref out_place } => {
+                CInlineAsmOperand::InOut {
+                    reg,
+                    _late: late,
+                    in_value: crate::base::codegen_operand(fx, in_value).load_scalar(fx),
+                    out_place: out_place.map(|place| crate::base::codegen_place(fx, place)),
+                }
+            }
+            InlineAsmOperand::Const { ref value } => {
+                let (const_value, ty) = crate::constant::eval_mir_constant(fx, value);
+                let value = rustc_codegen_ssa::common::asm_const_to_str(
+                    fx.tcx,
+                    span,
+                    const_value,
+                    fx.layout_of(ty),
+                );
+                CInlineAsmOperand::Const { value }
+            }
+            InlineAsmOperand::SymFn { ref value } => {
+                if cfg!(not(feature = "inline_asm_sym")) {
+                    fx.tcx
+                        .dcx()
+                        .span_err(span, "asm! and global_asm! sym operands are not yet supported");
+                }
+
+                let const_ = fx.monomorphize(value.const_);
+                if let ty::FnDef(def_id, args) = *const_.ty().kind() {
+                    let instance = ty::Instance::resolve_for_fn_ptr(
+                        fx.tcx,
+                        ty::ParamEnv::reveal_all(),
+                        def_id,
+                        args,
+                    )
+                    .unwrap();
+                    let symbol = fx.tcx.symbol_name(instance);
+
+                    // Pass a wrapper rather than the function itself as the function itself may not
+                    // be exported from the main codegen unit and may thus be unreachable from the
+                    // object file created by an external assembler.
+                    let inline_asm_index = fx.cx.inline_asm_index.get();
+                    fx.cx.inline_asm_index.set(inline_asm_index + 1);
+                    let wrapper_name = format!(
+                        "__inline_asm_{}_wrapper_n{}",
+                        fx.cx.cgu_name.as_str().replace('.', "__").replace('-', "_"),
+                        inline_asm_index
+                    );
+                    let sig =
+                        get_function_sig(fx.tcx, fx.target_config.default_call_conv, instance);
+                    create_wrapper_function(
+                        fx.module,
+                        &mut fx.cx.unwind_context,
+                        sig,
+                        &wrapper_name,
+                        symbol.name,
+                    );
+
+                    CInlineAsmOperand::Symbol { symbol: wrapper_name }
+                } else {
+                    span_bug!(span, "invalid type for asm sym (fn)");
+                }
+            }
+            InlineAsmOperand::SymStatic { def_id } => {
+                assert!(fx.tcx.is_static(def_id));
+                let instance = Instance::mono(fx.tcx, def_id);
+                CInlineAsmOperand::Symbol { symbol: fx.tcx.symbol_name(instance).name.to_owned() }
+            }
+            InlineAsmOperand::Label { .. } => {
+                span_bug!(span, "asm! label operands are not yet supported");
+            }
+        })
+        .collect::<Vec<_>>();
+
+    codegen_inline_asm_inner(fx, template, &operands, options);
+
+    match destination {
+        Some(destination) => {
+            let destination_block = fx.get_block(destination);
+            fx.bcx.ins().jump(destination_block, &[]);
+        }
+        None => {
+            fx.bcx.ins().trap(TrapCode::UnreachableCodeReached);
+        }
+    }
+}
+
+pub(crate) fn codegen_inline_asm_inner<'tcx>(
+    fx: &mut FunctionCx<'_, '_, 'tcx>,
+    template: &[InlineAsmTemplatePiece],
+    operands: &[CInlineAsmOperand<'tcx>],
+    options: InlineAsmOptions,
+) {
+    // FIXME add .eh_frame unwind info directives
+
+    let mut asm_gen = InlineAssemblyGenerator {
+        tcx: fx.tcx,
+        arch: fx.tcx.sess.asm_arch.unwrap(),
+        enclosing_def_id: fx.instance.def_id(),
+        template,
+        operands,
+        options,
+        registers: Vec::new(),
+        stack_slots_clobber: Vec::new(),
+        stack_slots_input: Vec::new(),
+        stack_slots_output: Vec::new(),
+        stack_slot_size: Size::from_bytes(0),
+        is_naked: false,
+    };
+    asm_gen.allocate_registers();
+    asm_gen.allocate_stack_slots();
+
+    let inline_asm_index = fx.cx.inline_asm_index.get();
+    fx.cx.inline_asm_index.set(inline_asm_index + 1);
+    let asm_name = format!(
+        "__inline_asm_{}_n{}",
+        fx.cx.cgu_name.as_str().replace('.', "__").replace('-', "_"),
+        inline_asm_index
+    );
+
+    let generated_asm = asm_gen.generate_asm_wrapper(&asm_name);
+    fx.cx.global_asm.push_str(&generated_asm);
+
+    let mut inputs = Vec::new();
+    let mut outputs = Vec::new();
+    for (i, operand) in operands.iter().enumerate() {
+        match operand {
+            CInlineAsmOperand::In { reg: _, value } => {
+                inputs.push((asm_gen.stack_slots_input[i].unwrap(), *value));
+            }
+            CInlineAsmOperand::Out { reg: _, late: _, place } => {
+                if let Some(place) = place {
+                    outputs.push((asm_gen.stack_slots_output[i].unwrap(), *place));
+                }
+            }
+            CInlineAsmOperand::InOut { reg: _, _late: _, in_value, out_place } => {
+                inputs.push((asm_gen.stack_slots_input[i].unwrap(), *in_value));
+                if let Some(out_place) = out_place {
+                    outputs.push((asm_gen.stack_slots_output[i].unwrap(), *out_place));
+                }
+            }
+            CInlineAsmOperand::Const { value: _ } | CInlineAsmOperand::Symbol { symbol: _ } => {}
+        }
+    }
+
+    call_inline_asm(fx, &asm_name, asm_gen.stack_slot_size, inputs, outputs);
+}
+
+pub(crate) fn codegen_naked_asm<'tcx>(
+    tcx: TyCtxt<'tcx>,
+    cx: &mut crate::CodegenCx,
+    module: &mut dyn Module,
+    instance: Instance<'tcx>,
+    span: Span,
+    symbol_name: &str,
+    template: &[InlineAsmTemplatePiece],
+    operands: &[InlineAsmOperand<'tcx>],
+    options: InlineAsmOptions,
+) {
+    // FIXME add .eh_frame unwind info directives
+
+    let operands = operands
+        .iter()
+        .map(|operand| match *operand {
+            InlineAsmOperand::In { .. }
+            | InlineAsmOperand::Out { .. }
+            | InlineAsmOperand::InOut { .. } => {
+                span_bug!(span, "invalid operand type for naked asm")
+            }
+            InlineAsmOperand::Const { ref value } => {
+                let cv = instance.instantiate_mir_and_normalize_erasing_regions(
+                    tcx,
+                    ty::ParamEnv::reveal_all(),
+                    ty::EarlyBinder::bind(value.const_),
+                );
+                let const_value = cv
+                    .eval(tcx, ty::ParamEnv::reveal_all(), value.span)
+                    .expect("erroneous constant missed by mono item collection");
+
+                let value = rustc_codegen_ssa::common::asm_const_to_str(
+                    tcx,
+                    span,
+                    const_value,
+                    RevealAllLayoutCx(tcx).layout_of(cv.ty()),
+                );
+                CInlineAsmOperand::Const { value }
+            }
+            InlineAsmOperand::SymFn { ref value } => {
+                if cfg!(not(feature = "inline_asm_sym")) {
+                    tcx.dcx()
+                        .span_err(span, "asm! and global_asm! sym operands are not yet supported");
+                }
+
+                let const_ = instance.instantiate_mir_and_normalize_erasing_regions(
+                    tcx,
+                    ty::ParamEnv::reveal_all(),
+                    ty::EarlyBinder::bind(value.const_),
+                );
+                if let ty::FnDef(def_id, args) = *const_.ty().kind() {
+                    let instance = ty::Instance::resolve_for_fn_ptr(
+                        tcx,
+                        ty::ParamEnv::reveal_all(),
+                        def_id,
+                        args,
+                    )
+                    .unwrap();
+                    let symbol = tcx.symbol_name(instance);
+
+                    // Pass a wrapper rather than the function itself as the function itself may not
+                    // be exported from the main codegen unit and may thus be unreachable from the
+                    // object file created by an external assembler.
+                    let inline_asm_index = cx.inline_asm_index.get();
+                    cx.inline_asm_index.set(inline_asm_index + 1);
+                    let wrapper_name = format!(
+                        "__inline_asm_{}_wrapper_n{}",
+                        cx.cgu_name.as_str().replace('.', "__").replace('-', "_"),
+                        inline_asm_index
+                    );
+                    let sig =
+                        get_function_sig(tcx, module.target_config().default_call_conv, instance);
+                    create_wrapper_function(
+                        module,
+                        &mut cx.unwind_context,
+                        sig,
+                        &wrapper_name,
+                        symbol.name,
+                    );
+
+                    CInlineAsmOperand::Symbol { symbol: wrapper_name }
+                } else {
+                    span_bug!(span, "invalid type for asm sym (fn)");
+                }
+            }
+            InlineAsmOperand::SymStatic { def_id } => {
+                assert!(tcx.is_static(def_id));
+                let instance = Instance::mono(tcx, def_id);
+                CInlineAsmOperand::Symbol { symbol: tcx.symbol_name(instance).name.to_owned() }
+            }
+            InlineAsmOperand::Label { .. } => {
+                span_bug!(span, "asm! label operands are not yet supported");
+            }
+        })
+        .collect::<Vec<_>>();
+
+    let asm_gen = InlineAssemblyGenerator {
+        tcx,
+        arch: tcx.sess.asm_arch.unwrap(),
+        enclosing_def_id: instance.def_id(),
+        template,
+        operands: &operands,
+        options,
+        registers: Vec::new(),
+        stack_slots_clobber: Vec::new(),
+        stack_slots_input: Vec::new(),
+        stack_slots_output: Vec::new(),
+        stack_slot_size: Size::from_bytes(0),
+        is_naked: true,
+    };
+
+    let generated_asm = asm_gen.generate_asm_wrapper(symbol_name);
+    cx.global_asm.push_str(&generated_asm);
+}
+
+struct InlineAssemblyGenerator<'a, 'tcx> {
+    tcx: TyCtxt<'tcx>,
+    arch: InlineAsmArch,
+    enclosing_def_id: DefId,
+    template: &'a [InlineAsmTemplatePiece],
+    operands: &'a [CInlineAsmOperand<'tcx>],
+    options: InlineAsmOptions,
+    registers: Vec<Option<InlineAsmReg>>,
+    stack_slots_clobber: Vec<Option<Size>>,
+    stack_slots_input: Vec<Option<Size>>,
+    stack_slots_output: Vec<Option<Size>>,
+    stack_slot_size: Size,
+    is_naked: bool,
+}
+
+impl<'tcx> InlineAssemblyGenerator<'_, 'tcx> {
+    fn allocate_registers(&mut self) {
+        assert!(!self.is_naked);
+
+        let sess = self.tcx.sess;
+        let map = allocatable_registers(
+            self.arch,
+            sess.relocation_model(),
+            self.tcx.asm_target_features(self.enclosing_def_id),
+            &sess.target,
+        );
+        let mut allocated = FxHashMap::<_, (bool, bool)>::default();
+        let mut regs = vec![None; self.operands.len()];
+
+        // Add explicit registers to the allocated set.
+        for (i, operand) in self.operands.iter().enumerate() {
+            match *operand {
+                CInlineAsmOperand::In { reg: InlineAsmRegOrRegClass::Reg(reg), .. } => {
+                    regs[i] = Some(reg);
+                    allocated.entry(reg).or_default().0 = true;
+                }
+                CInlineAsmOperand::Out {
+                    reg: InlineAsmRegOrRegClass::Reg(reg),
+                    late: true,
+                    ..
+                } => {
+                    regs[i] = Some(reg);
+                    allocated.entry(reg).or_default().1 = true;
+                }
+                CInlineAsmOperand::Out { reg: InlineAsmRegOrRegClass::Reg(reg), .. }
+                | CInlineAsmOperand::InOut { reg: InlineAsmRegOrRegClass::Reg(reg), .. } => {
+                    regs[i] = Some(reg);
+                    allocated.insert(reg, (true, true));
+                }
+                _ => (),
+            }
+        }
+
+        // Allocate out/inout/inlateout registers first because they are more constrained.
+        for (i, operand) in self.operands.iter().enumerate() {
+            match *operand {
+                CInlineAsmOperand::Out {
+                    reg: InlineAsmRegOrRegClass::RegClass(class),
+                    late: false,
+                    ..
+                }
+                | CInlineAsmOperand::InOut {
+                    reg: InlineAsmRegOrRegClass::RegClass(class), ..
+                } => {
+                    let mut alloc_reg = None;
+                    for &reg in &map[&class] {
+                        let mut used = false;
+                        reg.overlapping_regs(|r| {
+                            if allocated.contains_key(&r) {
+                                used = true;
+                            }
+                        });
+
+                        if !used {
+                            alloc_reg = Some(reg);
+                            break;
+                        }
+                    }
+
+                    let reg = alloc_reg.expect("cannot allocate registers");
+                    regs[i] = Some(reg);
+                    allocated.insert(reg, (true, true));
+                }
+                _ => (),
+            }
+        }
+
+        // Allocate in/lateout.
+        for (i, operand) in self.operands.iter().enumerate() {
+            match *operand {
+                CInlineAsmOperand::In { reg: InlineAsmRegOrRegClass::RegClass(class), .. } => {
+                    let mut alloc_reg = None;
+                    for &reg in &map[&class] {
+                        let mut used = false;
+                        reg.overlapping_regs(|r| {
+                            if allocated.get(&r).copied().unwrap_or_default().0 {
+                                used = true;
+                            }
+                        });
+
+                        if !used {
+                            alloc_reg = Some(reg);
+                            break;
+                        }
+                    }
+
+                    let reg = alloc_reg.expect("cannot allocate registers");
+                    regs[i] = Some(reg);
+                    allocated.entry(reg).or_default().0 = true;
+                }
+                CInlineAsmOperand::Out {
+                    reg: InlineAsmRegOrRegClass::RegClass(class),
+                    late: true,
+                    ..
+                } => {
+                    let mut alloc_reg = None;
+                    for &reg in &map[&class] {
+                        let mut used = false;
+                        reg.overlapping_regs(|r| {
+                            if allocated.get(&r).copied().unwrap_or_default().1 {
+                                used = true;
+                            }
+                        });
+
+                        if !used {
+                            alloc_reg = Some(reg);
+                            break;
+                        }
+                    }
+
+                    let reg = alloc_reg.expect("cannot allocate registers");
+                    regs[i] = Some(reg);
+                    allocated.entry(reg).or_default().1 = true;
+                }
+                _ => (),
+            }
+        }
+
+        self.registers = regs;
+    }
+
+    fn allocate_stack_slots(&mut self) {
+        assert!(!self.is_naked);
+
+        let mut slot_size = Size::from_bytes(0);
+        let mut slots_clobber = vec![None; self.operands.len()];
+        let mut slots_input = vec![None; self.operands.len()];
+        let mut slots_output = vec![None; self.operands.len()];
+
+        let new_slot_fn = |slot_size: &mut Size, reg_class: InlineAsmRegClass| {
+            let reg_size =
+                reg_class.supported_types(self.arch).iter().map(|(ty, _)| ty.size()).max().unwrap();
+            let align = rustc_target::abi::Align::from_bytes(reg_size.bytes()).unwrap();
+            let offset = slot_size.align_to(align);
+            *slot_size = offset + reg_size;
+            offset
+        };
+        let mut new_slot = |x| new_slot_fn(&mut slot_size, x);
+
+        // Allocate stack slots for saving clobbered registers
+        let abi_clobber = InlineAsmClobberAbi::parse(self.arch, &self.tcx.sess.target, sym::C)
+            .unwrap()
+            .clobbered_regs();
+        for (i, reg) in self.registers.iter().enumerate().filter_map(|(i, r)| r.map(|r| (i, r))) {
+            let mut need_save = true;
+            // If the register overlaps with a register clobbered by function call, then
+            // we don't need to save it.
+            for r in abi_clobber {
+                r.overlapping_regs(|r| {
+                    if r == reg {
+                        need_save = false;
+                    }
+                });
+
+                if !need_save {
+                    break;
+                }
+            }
+
+            if need_save {
+                slots_clobber[i] = Some(new_slot(reg.reg_class()));
+            }
+        }
+
+        // Allocate stack slots for inout
+        for (i, operand) in self.operands.iter().enumerate() {
+            match *operand {
+                CInlineAsmOperand::InOut { reg, out_place: Some(_), .. } => {
+                    let slot = new_slot(reg.reg_class());
+                    slots_input[i] = Some(slot);
+                    slots_output[i] = Some(slot);
+                }
+                _ => (),
+            }
+        }
+
+        let slot_size_before_input = slot_size;
+        let mut new_slot = |x| new_slot_fn(&mut slot_size, x);
+
+        // Allocate stack slots for input
+        for (i, operand) in self.operands.iter().enumerate() {
+            match *operand {
+                CInlineAsmOperand::In { reg, .. }
+                | CInlineAsmOperand::InOut { reg, out_place: None, .. } => {
+                    slots_input[i] = Some(new_slot(reg.reg_class()));
+                }
+                _ => (),
+            }
+        }
+
+        // Reset slot size to before input so that input and output operands can overlap
+        // and save some memory.
+        let slot_size_after_input = slot_size;
+        slot_size = slot_size_before_input;
+        let mut new_slot = |x| new_slot_fn(&mut slot_size, x);
+
+        // Allocate stack slots for output
+        for (i, operand) in self.operands.iter().enumerate() {
+            match *operand {
+                CInlineAsmOperand::Out { reg, place: Some(_), .. } => {
+                    slots_output[i] = Some(new_slot(reg.reg_class()));
+                }
+                _ => (),
+            }
+        }
+
+        slot_size = slot_size.max(slot_size_after_input);
+
+        self.stack_slots_clobber = slots_clobber;
+        self.stack_slots_input = slots_input;
+        self.stack_slots_output = slots_output;
+        self.stack_slot_size = slot_size;
+    }
+
+    fn generate_asm_wrapper(&self, asm_name: &str) -> String {
+        let binary_format = crate::target_triple(self.tcx.sess).binary_format;
+
+        let mut generated_asm = String::new();
+        match binary_format {
+            BinaryFormat::Elf => {
+                writeln!(generated_asm, ".globl {}", asm_name).unwrap();
+                writeln!(generated_asm, ".type {},@function", asm_name).unwrap();
+                writeln!(generated_asm, ".section .text.{},\"ax\",@progbits", asm_name).unwrap();
+                writeln!(generated_asm, "{}:", asm_name).unwrap();
+            }
+            BinaryFormat::Macho => {
+                writeln!(generated_asm, ".globl _{}", asm_name).unwrap();
+                writeln!(generated_asm, "_{}:", asm_name).unwrap();
+            }
+            BinaryFormat::Coff => {
+                writeln!(generated_asm, ".globl {}", asm_name).unwrap();
+                writeln!(generated_asm, "{}:", asm_name).unwrap();
+            }
+            _ => self
+                .tcx
+                .dcx()
+                .fatal(format!("Unsupported binary format for inline asm: {binary_format:?}")),
+        }
+
+        let is_x86 = matches!(self.arch, InlineAsmArch::X86 | InlineAsmArch::X86_64);
+
+        if is_x86 {
+            generated_asm.push_str(".intel_syntax noprefix\n");
+        }
+        if !self.is_naked {
+            Self::prologue(&mut generated_asm, self.arch);
+
+            // Save clobbered registers
+            if !self.options.contains(InlineAsmOptions::NORETURN) {
+                for (reg, slot) in self
+                    .registers
+                    .iter()
+                    .zip(self.stack_slots_clobber.iter().copied())
+                    .filter_map(|(r, s)| r.zip(s))
+                {
+                    Self::save_register(&mut generated_asm, self.arch, reg, slot);
+                }
+            }
+
+            // Write input registers
+            for (reg, slot) in self
+                .registers
+                .iter()
+                .zip(self.stack_slots_input.iter().copied())
+                .filter_map(|(r, s)| r.zip(s))
+            {
+                Self::restore_register(&mut generated_asm, self.arch, reg, slot);
+            }
+        }
+
+        if is_x86 && self.options.contains(InlineAsmOptions::ATT_SYNTAX) {
+            generated_asm.push_str(".att_syntax\n");
+        }
+
+        // The actual inline asm
+        for piece in self.template {
+            match piece {
+                InlineAsmTemplatePiece::String(s) => {
+                    generated_asm.push_str(s);
+                }
+                InlineAsmTemplatePiece::Placeholder { operand_idx, modifier, span: _ } => {
+                    match self.operands[*operand_idx] {
+                        CInlineAsmOperand::In { .. }
+                        | CInlineAsmOperand::Out { .. }
+                        | CInlineAsmOperand::InOut { .. } => {
+                            if self.options.contains(InlineAsmOptions::ATT_SYNTAX) {
+                                generated_asm.push('%');
+                            }
+
+                            let reg = self.registers[*operand_idx].unwrap();
+                            match self.arch {
+                                InlineAsmArch::X86_64 => match reg {
+                                    InlineAsmReg::X86(reg)
+                                        if reg as u32 >= X86InlineAsmReg::xmm0 as u32
+                                            && reg as u32 <= X86InlineAsmReg::xmm15 as u32 =>
+                                    {
+                                        // rustc emits x0 rather than xmm0
+                                        let class = match *modifier {
+                                            None | Some('x') => "xmm",
+                                            Some('y') => "ymm",
+                                            Some('z') => "zmm",
+                                            _ => unreachable!(),
+                                        };
+                                        write!(
+                                            generated_asm,
+                                            "{class}{}",
+                                            reg as u32 - X86InlineAsmReg::xmm0 as u32
+                                        )
+                                        .unwrap();
+                                    }
+                                    _ => reg
+                                        .emit(&mut generated_asm, InlineAsmArch::X86_64, *modifier)
+                                        .unwrap(),
+                                },
+                                _ => reg.emit(&mut generated_asm, self.arch, *modifier).unwrap(),
+                            }
+                        }
+                        CInlineAsmOperand::Const { ref value } => {
+                            generated_asm.push_str(value);
+                        }
+                        CInlineAsmOperand::Symbol { ref symbol } => generated_asm.push_str(symbol),
+                    }
+                }
+            }
+        }
+        generated_asm.push('\n');
+
+        if is_x86 && self.options.contains(InlineAsmOptions::ATT_SYNTAX) {
+            generated_asm.push_str(".intel_syntax noprefix\n");
+        }
+
+        if !self.is_naked {
+            if !self.options.contains(InlineAsmOptions::NORETURN) {
+                // Read output registers
+                for (reg, slot) in self
+                    .registers
+                    .iter()
+                    .zip(self.stack_slots_output.iter().copied())
+                    .filter_map(|(r, s)| r.zip(s))
+                {
+                    Self::save_register(&mut generated_asm, self.arch, reg, slot);
+                }
+
+                // Restore clobbered registers
+                for (reg, slot) in self
+                    .registers
+                    .iter()
+                    .zip(self.stack_slots_clobber.iter().copied())
+                    .filter_map(|(r, s)| r.zip(s))
+                {
+                    Self::restore_register(&mut generated_asm, self.arch, reg, slot);
+                }
+
+                Self::epilogue(&mut generated_asm, self.arch);
+            } else {
+                Self::epilogue_noreturn(&mut generated_asm, self.arch);
+            }
+        }
+
+        if is_x86 {
+            generated_asm.push_str(".att_syntax\n");
+        }
+
+        match binary_format {
+            BinaryFormat::Elf => {
+                writeln!(generated_asm, ".size {name}, .-{name}", name = asm_name).unwrap();
+                generated_asm.push_str(".text\n");
+            }
+            BinaryFormat::Macho | BinaryFormat::Coff => {}
+            _ => self
+                .tcx
+                .dcx()
+                .fatal(format!("Unsupported binary format for inline asm: {binary_format:?}")),
+        }
+
+        generated_asm.push_str("\n\n");
+
+        generated_asm
+    }
+
+    fn prologue(generated_asm: &mut String, arch: InlineAsmArch) {
+        match arch {
+            InlineAsmArch::X86_64 => {
+                generated_asm.push_str("    push rbp\n");
+                generated_asm.push_str("    mov rbp,rsp\n");
+                generated_asm.push_str("    push rbx\n"); // rbx is callee saved
+                // rbx is reserved by LLVM for the "base pointer", so rustc doesn't allow using it
+                generated_asm.push_str("    mov rbx,rdi\n");
+            }
+            InlineAsmArch::AArch64 => {
+                generated_asm.push_str("    stp fp, lr, [sp, #-32]!\n");
+                generated_asm.push_str("    mov fp, sp\n");
+                generated_asm.push_str("    str x19, [sp, #24]\n"); // x19 is callee saved
+                // x19 is reserved by LLVM for the "base pointer", so rustc doesn't allow using it
+                generated_asm.push_str("    mov x19, x0\n");
+            }
+            InlineAsmArch::RiscV64 => {
+                generated_asm.push_str("    addi sp, sp, -16\n");
+                generated_asm.push_str("    sd ra, 8(sp)\n");
+                generated_asm.push_str("    sd s1, 0(sp)\n"); // s1 is callee saved
+                // s1/x9 is reserved by LLVM for the "base pointer", so rustc doesn't allow using it
+                generated_asm.push_str("    mv s1, a0\n");
+            }
+            _ => unimplemented!("prologue for {:?}", arch),
+        }
+    }
+
+    fn epilogue(generated_asm: &mut String, arch: InlineAsmArch) {
+        match arch {
+            InlineAsmArch::X86_64 => {
+                generated_asm.push_str("    pop rbx\n");
+                generated_asm.push_str("    pop rbp\n");
+                generated_asm.push_str("    ret\n");
+            }
+            InlineAsmArch::AArch64 => {
+                generated_asm.push_str("    ldr x19, [sp, #24]\n");
+                generated_asm.push_str("    ldp fp, lr, [sp], #32\n");
+                generated_asm.push_str("    ret\n");
+            }
+            InlineAsmArch::RiscV64 => {
+                generated_asm.push_str("    ld s1, 0(sp)\n");
+                generated_asm.push_str("    ld ra, 8(sp)\n");
+                generated_asm.push_str("    addi sp, sp, 16\n");
+                generated_asm.push_str("    ret\n");
+            }
+            _ => unimplemented!("epilogue for {:?}", arch),
+        }
+    }
+
+    fn epilogue_noreturn(generated_asm: &mut String, arch: InlineAsmArch) {
+        match arch {
+            InlineAsmArch::X86_64 => {
+                generated_asm.push_str("    ud2\n");
+            }
+            InlineAsmArch::AArch64 => {
+                generated_asm.push_str("    brk     #0x1\n");
+            }
+            InlineAsmArch::RiscV64 => {
+                generated_asm.push_str("    ebreak\n");
+            }
+            _ => unimplemented!("epilogue_noreturn for {:?}", arch),
+        }
+    }
+
+    fn save_register(
+        generated_asm: &mut String,
+        arch: InlineAsmArch,
+        reg: InlineAsmReg,
+        offset: Size,
+    ) {
+        match arch {
+            InlineAsmArch::X86_64 => {
+                match reg {
+                    InlineAsmReg::X86(reg)
+                        if reg as u32 >= X86InlineAsmReg::xmm0 as u32
+                            && reg as u32 <= X86InlineAsmReg::xmm15 as u32 =>
+                    {
+                        // rustc emits x0 rather than xmm0
+                        write!(generated_asm, "    movups [rbx+0x{:x}], ", offset.bytes()).unwrap();
+                        write!(generated_asm, "xmm{}", reg as u32 - X86InlineAsmReg::xmm0 as u32)
+                            .unwrap();
+                    }
+                    _ => {
+                        write!(generated_asm, "    mov [rbx+0x{:x}], ", offset.bytes()).unwrap();
+                        reg.emit(generated_asm, InlineAsmArch::X86_64, None).unwrap();
+                    }
+                }
+                generated_asm.push('\n');
+            }
+            InlineAsmArch::AArch64 => {
+                generated_asm.push_str("    str ");
+                reg.emit(generated_asm, InlineAsmArch::AArch64, None).unwrap();
+                writeln!(generated_asm, ", [x19, 0x{:x}]", offset.bytes()).unwrap();
+            }
+            InlineAsmArch::RiscV64 => {
+                generated_asm.push_str("    sd ");
+                reg.emit(generated_asm, InlineAsmArch::RiscV64, None).unwrap();
+                writeln!(generated_asm, ", 0x{:x}(s1)", offset.bytes()).unwrap();
+            }
+            _ => unimplemented!("save_register for {:?}", arch),
+        }
+    }
+
+    fn restore_register(
+        generated_asm: &mut String,
+        arch: InlineAsmArch,
+        reg: InlineAsmReg,
+        offset: Size,
+    ) {
+        match arch {
+            InlineAsmArch::X86_64 => {
+                match reg {
+                    InlineAsmReg::X86(reg)
+                        if reg as u32 >= X86InlineAsmReg::xmm0 as u32
+                            && reg as u32 <= X86InlineAsmReg::xmm15 as u32 =>
+                    {
+                        // rustc emits x0 rather than xmm0
+                        write!(
+                            generated_asm,
+                            "    movups xmm{}",
+                            reg as u32 - X86InlineAsmReg::xmm0 as u32
+                        )
+                        .unwrap();
+                    }
+                    _ => {
+                        generated_asm.push_str("    mov ");
+                        reg.emit(generated_asm, InlineAsmArch::X86_64, None).unwrap()
+                    }
+                }
+                writeln!(generated_asm, ", [rbx+0x{:x}]", offset.bytes()).unwrap();
+            }
+            InlineAsmArch::AArch64 => {
+                generated_asm.push_str("    ldr ");
+                reg.emit(generated_asm, InlineAsmArch::AArch64, None).unwrap();
+                writeln!(generated_asm, ", [x19, 0x{:x}]", offset.bytes()).unwrap();
+            }
+            InlineAsmArch::RiscV64 => {
+                generated_asm.push_str("    ld ");
+                reg.emit(generated_asm, InlineAsmArch::RiscV64, None).unwrap();
+                writeln!(generated_asm, ", 0x{:x}(s1)", offset.bytes()).unwrap();
+            }
+            _ => unimplemented!("restore_register for {:?}", arch),
+        }
+    }
+}
+
+fn call_inline_asm<'tcx>(
+    fx: &mut FunctionCx<'_, '_, 'tcx>,
+    asm_name: &str,
+    slot_size: Size,
+    inputs: Vec<(Size, Value)>,
+    outputs: Vec<(Size, CPlace<'tcx>)>,
+) {
+    let stack_slot = fx.create_stack_slot(u32::try_from(slot_size.bytes()).unwrap(), 16);
+
+    let inline_asm_func = fx
+        .module
+        .declare_function(
+            asm_name,
+            Linkage::Import,
+            &Signature {
+                call_conv: CallConv::SystemV,
+                params: vec![AbiParam::new(fx.pointer_type)],
+                returns: vec![],
+            },
+        )
+        .unwrap();
+    let inline_asm_func = fx.module.declare_func_in_func(inline_asm_func, fx.bcx.func);
+    if fx.clif_comments.enabled() {
+        fx.add_comment(inline_asm_func, asm_name);
+    }
+
+    for (offset, value) in inputs {
+        stack_slot.offset(fx, i32::try_from(offset.bytes()).unwrap().into()).store(
+            fx,
+            value,
+            MemFlags::trusted(),
+        );
+    }
+
+    let stack_slot_addr = stack_slot.get_addr(fx);
+    fx.bcx.ins().call(inline_asm_func, &[stack_slot_addr]);
+
+    for (offset, place) in outputs {
+        let ty = if place.layout().ty.is_simd() {
+            let (lane_count, lane_type) = place.layout().ty.simd_size_and_type(fx.tcx);
+            asm_clif_type(fx, lane_type).unwrap().by(lane_count.try_into().unwrap()).unwrap()
+        } else {
+            asm_clif_type(fx, place.layout().ty).unwrap()
+        };
+        let value = stack_slot.offset(fx, i32::try_from(offset.bytes()).unwrap().into()).load(
+            fx,
+            ty,
+            MemFlags::trusted(),
+        );
+        place.write_cvalue(fx, CValue::by_val(value, place.layout()));
+    }
+}
+
+fn asm_clif_type<'tcx>(fx: &FunctionCx<'_, '_, 'tcx>, ty: Ty<'tcx>) -> Option<types::Type> {
+    match ty.kind() {
+        // Adapted from https://github.com/rust-lang/rust/blob/f3c66088610c1b80110297c2d9a8b5f9265b013f/compiler/rustc_hir_analysis/src/check/intrinsicck.rs#L136-L151
+        ty::Adt(adt, args) if Some(adt.did()) == fx.tcx.lang_items().maybe_uninit() => {
+            let fields = &adt.non_enum_variant().fields;
+            let ty = fields[FieldIdx::from_u32(1)].ty(fx.tcx, args);
+            let ty::Adt(ty, args) = ty.kind() else {
+                unreachable!("expected first field of `MaybeUninit` to be an ADT")
+            };
+            assert!(
+                ty.is_manually_drop(),
+                "expected first field of `MaybeUninit` to be `ManuallyDrop`"
+            );
+            let fields = &ty.non_enum_variant().fields;
+            let ty = fields[FieldIdx::ZERO].ty(fx.tcx, args);
+            fx.clif_type(ty)
+        }
+        _ => fx.clif_type(ty),
+    }
+}
diff --git a/compiler/rustc_codegen_cranelift/src/intrinsics/llvm.rs b/compiler/rustc_codegen_cranelift/src/intrinsics/llvm.rs
new file mode 100644
index 00000000000..e50c74b87f6
--- /dev/null
+++ b/compiler/rustc_codegen_cranelift/src/intrinsics/llvm.rs
@@ -0,0 +1,85 @@
+//! Emulate LLVM intrinsics
+
+use crate::intrinsics::*;
+use crate::prelude::*;
+
+pub(crate) fn codegen_llvm_intrinsic_call<'tcx>(
+    fx: &mut FunctionCx<'_, '_, 'tcx>,
+    intrinsic: &str,
+    generic_args: GenericArgsRef<'tcx>,
+    args: &[Spanned<mir::Operand<'tcx>>],
+    ret: CPlace<'tcx>,
+    target: Option<BasicBlock>,
+    span: Span,
+) {
+    if intrinsic.starts_with("llvm.aarch64") {
+        return llvm_aarch64::codegen_aarch64_llvm_intrinsic_call(
+            fx,
+            intrinsic,
+            generic_args,
+            args,
+            ret,
+            target,
+        );
+    }
+    if intrinsic.starts_with("llvm.x86") {
+        return llvm_x86::codegen_x86_llvm_intrinsic_call(
+            fx,
+            intrinsic,
+            generic_args,
+            args,
+            ret,
+            target,
+            span,
+        );
+    }
+
+    match intrinsic {
+        "llvm.prefetch" => {
+            // Nothing to do. This is merely a perf hint.
+        }
+
+        _ if intrinsic.starts_with("llvm.ctlz.v") => {
+            intrinsic_args!(fx, args => (a); intrinsic);
+
+            simd_for_each_lane(fx, a, ret, &|fx, _lane_ty, _res_lane_ty, lane| {
+                fx.bcx.ins().clz(lane)
+            });
+        }
+
+        _ if intrinsic.starts_with("llvm.ctpop.v") => {
+            intrinsic_args!(fx, args => (a); intrinsic);
+
+            simd_for_each_lane(fx, a, ret, &|fx, _lane_ty, _res_lane_ty, lane| {
+                fx.bcx.ins().popcnt(lane)
+            });
+        }
+
+        _ if intrinsic.starts_with("llvm.fma.v") => {
+            intrinsic_args!(fx, args => (x,y,z); intrinsic);
+
+            simd_trio_for_each_lane(
+                fx,
+                x,
+                y,
+                z,
+                ret,
+                &|fx, _lane_ty, _res_lane_ty, lane_x, lane_y, lane_z| {
+                    fx.bcx.ins().fma(lane_x, lane_y, lane_z)
+                },
+            );
+        }
+
+        _ => {
+            fx.tcx
+                .dcx()
+                .warn(format!("unsupported llvm intrinsic {}; replacing with trap", intrinsic));
+            crate::trap::trap_unimplemented(fx, intrinsic);
+            return;
+        }
+    }
+
+    let dest = target.expect("all llvm intrinsics used by stdlib should return");
+    let ret_block = fx.get_block(dest);
+    fx.bcx.ins().jump(ret_block, &[]);
+}
diff --git a/compiler/rustc_codegen_cranelift/src/intrinsics/llvm_aarch64.rs b/compiler/rustc_codegen_cranelift/src/intrinsics/llvm_aarch64.rs
new file mode 100644
index 00000000000..e66bcbf4e40
--- /dev/null
+++ b/compiler/rustc_codegen_cranelift/src/intrinsics/llvm_aarch64.rs
@@ -0,0 +1,338 @@
+//! Emulate AArch64 LLVM intrinsics
+
+use crate::intrinsics::*;
+use crate::prelude::*;
+
+pub(crate) fn codegen_aarch64_llvm_intrinsic_call<'tcx>(
+    fx: &mut FunctionCx<'_, '_, 'tcx>,
+    intrinsic: &str,
+    _args: GenericArgsRef<'tcx>,
+    args: &[Spanned<mir::Operand<'tcx>>],
+    ret: CPlace<'tcx>,
+    target: Option<BasicBlock>,
+) {
+    // llvm.aarch64.neon.sqshl.v*i*
+
+    match intrinsic {
+        "llvm.aarch64.isb" => {
+            fx.bcx.ins().fence();
+        }
+
+        _ if intrinsic.starts_with("llvm.aarch64.neon.abs.v") => {
+            intrinsic_args!(fx, args => (a); intrinsic);
+
+            simd_for_each_lane(fx, a, ret, &|fx, _lane_ty, _res_lane_ty, lane| {
+                fx.bcx.ins().iabs(lane)
+            });
+        }
+
+        _ if intrinsic.starts_with("llvm.aarch64.neon.cls.v") => {
+            intrinsic_args!(fx, args => (a); intrinsic);
+
+            simd_for_each_lane(fx, a, ret, &|fx, _lane_ty, _res_lane_ty, lane| {
+                fx.bcx.ins().cls(lane)
+            });
+        }
+
+        _ if intrinsic.starts_with("llvm.aarch64.neon.rbit.v") => {
+            intrinsic_args!(fx, args => (a); intrinsic);
+
+            simd_for_each_lane(fx, a, ret, &|fx, _lane_ty, _res_lane_ty, lane| {
+                fx.bcx.ins().bitrev(lane)
+            });
+        }
+
+        _ if intrinsic.starts_with("llvm.aarch64.neon.sqadd.v")
+            || intrinsic.starts_with("llvm.aarch64.neon.uqadd.v") =>
+        {
+            intrinsic_args!(fx, args => (x, y); intrinsic);
+
+            simd_pair_for_each_lane_typed(fx, x, y, ret, &|fx, x_lane, y_lane| {
+                crate::num::codegen_saturating_int_binop(fx, BinOp::Add, x_lane, y_lane)
+            });
+        }
+
+        _ if intrinsic.starts_with("llvm.aarch64.neon.sqsub.v")
+            || intrinsic.starts_with("llvm.aarch64.neon.uqsub.v") =>
+        {
+            intrinsic_args!(fx, args => (x, y); intrinsic);
+
+            simd_pair_for_each_lane_typed(fx, x, y, ret, &|fx, x_lane, y_lane| {
+                crate::num::codegen_saturating_int_binop(fx, BinOp::Sub, x_lane, y_lane)
+            });
+        }
+
+        _ if intrinsic.starts_with("llvm.aarch64.neon.smax.v") => {
+            intrinsic_args!(fx, args => (x, y); intrinsic);
+
+            simd_pair_for_each_lane(
+                fx,
+                x,
+                y,
+                ret,
+                &|fx, _lane_ty, _res_lane_ty, x_lane, y_lane| {
+                    let gt = fx.bcx.ins().icmp(IntCC::SignedGreaterThan, x_lane, y_lane);
+                    fx.bcx.ins().select(gt, x_lane, y_lane)
+                },
+            );
+        }
+
+        _ if intrinsic.starts_with("llvm.aarch64.neon.umax.v") => {
+            intrinsic_args!(fx, args => (x, y); intrinsic);
+
+            simd_pair_for_each_lane(
+                fx,
+                x,
+                y,
+                ret,
+                &|fx, _lane_ty, _res_lane_ty, x_lane, y_lane| {
+                    let gt = fx.bcx.ins().icmp(IntCC::UnsignedGreaterThan, x_lane, y_lane);
+                    fx.bcx.ins().select(gt, x_lane, y_lane)
+                },
+            );
+        }
+
+        _ if intrinsic.starts_with("llvm.aarch64.neon.smaxv.i") => {
+            intrinsic_args!(fx, args => (v); intrinsic);
+
+            simd_reduce(fx, v, None, ret, &|fx, _ty, a, b| {
+                let gt = fx.bcx.ins().icmp(IntCC::SignedGreaterThan, a, b);
+                fx.bcx.ins().select(gt, a, b)
+            });
+        }
+
+        _ if intrinsic.starts_with("llvm.aarch64.neon.umaxv.i") => {
+            intrinsic_args!(fx, args => (v); intrinsic);
+
+            simd_reduce(fx, v, None, ret, &|fx, _ty, a, b| {
+                let gt = fx.bcx.ins().icmp(IntCC::UnsignedGreaterThan, a, b);
+                fx.bcx.ins().select(gt, a, b)
+            });
+        }
+
+        _ if intrinsic.starts_with("llvm.aarch64.neon.smin.v") => {
+            intrinsic_args!(fx, args => (x, y); intrinsic);
+
+            simd_pair_for_each_lane(
+                fx,
+                x,
+                y,
+                ret,
+                &|fx, _lane_ty, _res_lane_ty, x_lane, y_lane| {
+                    let gt = fx.bcx.ins().icmp(IntCC::SignedLessThan, x_lane, y_lane);
+                    fx.bcx.ins().select(gt, x_lane, y_lane)
+                },
+            );
+        }
+
+        _ if intrinsic.starts_with("llvm.aarch64.neon.umin.v") => {
+            intrinsic_args!(fx, args => (x, y); intrinsic);
+
+            simd_pair_for_each_lane(
+                fx,
+                x,
+                y,
+                ret,
+                &|fx, _lane_ty, _res_lane_ty, x_lane, y_lane| {
+                    let gt = fx.bcx.ins().icmp(IntCC::UnsignedLessThan, x_lane, y_lane);
+                    fx.bcx.ins().select(gt, x_lane, y_lane)
+                },
+            );
+        }
+
+        _ if intrinsic.starts_with("llvm.aarch64.neon.sminv.i") => {
+            intrinsic_args!(fx, args => (v); intrinsic);
+
+            simd_reduce(fx, v, None, ret, &|fx, _ty, a, b| {
+                let gt = fx.bcx.ins().icmp(IntCC::SignedLessThan, a, b);
+                fx.bcx.ins().select(gt, a, b)
+            });
+        }
+
+        _ if intrinsic.starts_with("llvm.aarch64.neon.uminv.i") => {
+            intrinsic_args!(fx, args => (v); intrinsic);
+
+            simd_reduce(fx, v, None, ret, &|fx, _ty, a, b| {
+                let gt = fx.bcx.ins().icmp(IntCC::UnsignedLessThan, a, b);
+                fx.bcx.ins().select(gt, a, b)
+            });
+        }
+
+        _ if intrinsic.starts_with("llvm.aarch64.neon.umaxp.v") => {
+            intrinsic_args!(fx, args => (x, y); intrinsic);
+
+            simd_horizontal_pair_for_each_lane(
+                fx,
+                x,
+                y,
+                ret,
+                &|fx, _lane_ty, _res_lane_ty, x_lane, y_lane| fx.bcx.ins().umax(x_lane, y_lane),
+            );
+        }
+
+        _ if intrinsic.starts_with("llvm.aarch64.neon.smaxp.v") => {
+            intrinsic_args!(fx, args => (x, y); intrinsic);
+
+            simd_horizontal_pair_for_each_lane(
+                fx,
+                x,
+                y,
+                ret,
+                &|fx, _lane_ty, _res_lane_ty, x_lane, y_lane| fx.bcx.ins().smax(x_lane, y_lane),
+            );
+        }
+
+        _ if intrinsic.starts_with("llvm.aarch64.neon.uminp.v") => {
+            intrinsic_args!(fx, args => (x, y); intrinsic);
+
+            simd_horizontal_pair_for_each_lane(
+                fx,
+                x,
+                y,
+                ret,
+                &|fx, _lane_ty, _res_lane_ty, x_lane, y_lane| fx.bcx.ins().umin(x_lane, y_lane),
+            );
+        }
+
+        _ if intrinsic.starts_with("llvm.aarch64.neon.sminp.v") => {
+            intrinsic_args!(fx, args => (x, y); intrinsic);
+
+            simd_horizontal_pair_for_each_lane(
+                fx,
+                x,
+                y,
+                ret,
+                &|fx, _lane_ty, _res_lane_ty, x_lane, y_lane| fx.bcx.ins().smin(x_lane, y_lane),
+            );
+        }
+
+        _ if intrinsic.starts_with("llvm.aarch64.neon.fminp.v") => {
+            intrinsic_args!(fx, args => (x, y); intrinsic);
+
+            simd_horizontal_pair_for_each_lane(
+                fx,
+                x,
+                y,
+                ret,
+                &|fx, _lane_ty, _res_lane_ty, x_lane, y_lane| fx.bcx.ins().fmin(x_lane, y_lane),
+            );
+        }
+
+        _ if intrinsic.starts_with("llvm.aarch64.neon.fmaxp.v") => {
+            intrinsic_args!(fx, args => (x, y); intrinsic);
+
+            simd_horizontal_pair_for_each_lane(
+                fx,
+                x,
+                y,
+                ret,
+                &|fx, _lane_ty, _res_lane_ty, x_lane, y_lane| fx.bcx.ins().fmax(x_lane, y_lane),
+            );
+        }
+
+        _ if intrinsic.starts_with("llvm.aarch64.neon.addp.v") => {
+            intrinsic_args!(fx, args => (x, y); intrinsic);
+
+            simd_horizontal_pair_for_each_lane(
+                fx,
+                x,
+                y,
+                ret,
+                &|fx, _lane_ty, _res_lane_ty, x_lane, y_lane| fx.bcx.ins().iadd(x_lane, y_lane),
+            );
+        }
+
+        // FIXME generalize vector types
+        "llvm.aarch64.neon.tbl1.v8i8" => {
+            intrinsic_args!(fx, args => (t, idx); intrinsic);
+
+            let zero = fx.bcx.ins().iconst(types::I8, 0);
+            for i in 0..8 {
+                let idx_lane = idx.value_lane(fx, i).load_scalar(fx);
+                let is_zero =
+                    fx.bcx.ins().icmp_imm(IntCC::UnsignedGreaterThanOrEqual, idx_lane, 16);
+                let t_idx = fx.bcx.ins().uextend(fx.pointer_type, idx_lane);
+                let t_lane = t.value_lane_dyn(fx, t_idx).load_scalar(fx);
+                let res = fx.bcx.ins().select(is_zero, zero, t_lane);
+                ret.place_lane(fx, i).to_ptr().store(fx, res, MemFlags::trusted());
+            }
+        }
+        "llvm.aarch64.neon.tbl1.v16i8" => {
+            intrinsic_args!(fx, args => (t, idx); intrinsic);
+
+            let zero = fx.bcx.ins().iconst(types::I8, 0);
+            for i in 0..16 {
+                let idx_lane = idx.value_lane(fx, i).load_scalar(fx);
+                let is_zero =
+                    fx.bcx.ins().icmp_imm(IntCC::UnsignedGreaterThanOrEqual, idx_lane, 16);
+                let t_idx = fx.bcx.ins().uextend(fx.pointer_type, idx_lane);
+                let t_lane = t.value_lane_dyn(fx, t_idx).load_scalar(fx);
+                let res = fx.bcx.ins().select(is_zero, zero, t_lane);
+                ret.place_lane(fx, i).to_ptr().store(fx, res, MemFlags::trusted());
+            }
+        }
+
+        /*
+        _ if intrinsic.starts_with("llvm.aarch64.neon.sshl.v")
+            || intrinsic.starts_with("llvm.aarch64.neon.sqshl.v")
+            // FIXME split this one out once saturating is implemented
+            || intrinsic.starts_with("llvm.aarch64.neon.sqshlu.v") =>
+        {
+            intrinsic_args!(fx, args => (a, b); intrinsic);
+
+            simd_pair_for_each_lane(fx, a, b, ret, &|fx, _lane_ty, _res_lane_ty, a, b| {
+                // FIXME saturate?
+                fx.bcx.ins().ishl(a, b)
+            });
+        }
+
+        _ if intrinsic.starts_with("llvm.aarch64.neon.sqshrn.v") => {
+            let (a, imm32) = match args {
+                [a, imm32] => (a, imm32),
+                _ => bug!("wrong number of args for intrinsic {intrinsic}"),
+            };
+            let a = codegen_operand(fx, a);
+            let imm32 = crate::constant::mir_operand_get_const_val(fx, imm32)
+                .expect("llvm.aarch64.neon.sqshrn.v* imm32 not const");
+
+            simd_for_each_lane(fx, a, ret, &|fx, _lane_ty, _res_lane_ty, lane| match imm32
+                .try_to_bits(Size::from_bytes(4))
+                .unwrap_or_else(|| panic!("imm32 not scalar: {:?}", imm32))
+            {
+                imm32 if imm32 < 32 => fx.bcx.ins().sshr_imm(lane, i64::from(imm32 as u8)),
+                _ => fx.bcx.ins().iconst(types::I32, 0),
+            });
+        }
+
+        _ if intrinsic.starts_with("llvm.aarch64.neon.sqshrun.v") => {
+            let (a, imm32) = match args {
+                [a, imm32] => (a, imm32),
+                _ => bug!("wrong number of args for intrinsic {intrinsic}"),
+            };
+            let a = codegen_operand(fx, a);
+            let imm32 = crate::constant::mir_operand_get_const_val(fx, imm32)
+                .expect("llvm.aarch64.neon.sqshrn.v* imm32 not const");
+
+            simd_for_each_lane(fx, a, ret, &|fx, _lane_ty, _res_lane_ty, lane| match imm32
+                .try_to_bits(Size::from_bytes(4))
+                .unwrap_or_else(|| panic!("imm32 not scalar: {:?}", imm32))
+            {
+                imm32 if imm32 < 32 => fx.bcx.ins().ushr_imm(lane, i64::from(imm32 as u8)),
+                _ => fx.bcx.ins().iconst(types::I32, 0),
+            });
+        }
+        */
+        _ => {
+            fx.tcx.dcx().warn(format!(
+                "unsupported AArch64 llvm intrinsic {}; replacing with trap",
+                intrinsic
+            ));
+            crate::trap::trap_unimplemented(fx, intrinsic);
+            return;
+        }
+    }
+
+    let dest = target.expect("all llvm intrinsics used by stdlib should return");
+    let ret_block = fx.get_block(dest);
+    fx.bcx.ins().jump(ret_block, &[]);
+}
diff --git a/compiler/rustc_codegen_cranelift/src/intrinsics/llvm_x86.rs b/compiler/rustc_codegen_cranelift/src/intrinsics/llvm_x86.rs
new file mode 100644
index 00000000000..27b55ecc72e
--- /dev/null
+++ b/compiler/rustc_codegen_cranelift/src/intrinsics/llvm_x86.rs
@@ -0,0 +1,1574 @@
+//! Emulate x86 LLVM intrinsics
+
+use rustc_ast::ast::{InlineAsmOptions, InlineAsmTemplatePiece};
+use rustc_target::asm::*;
+
+use crate::inline_asm::{codegen_inline_asm_inner, CInlineAsmOperand};
+use crate::intrinsics::*;
+use crate::prelude::*;
+
+pub(crate) fn codegen_x86_llvm_intrinsic_call<'tcx>(
+    fx: &mut FunctionCx<'_, '_, 'tcx>,
+    intrinsic: &str,
+    _args: GenericArgsRef<'tcx>,
+    args: &[Spanned<mir::Operand<'tcx>>],
+    ret: CPlace<'tcx>,
+    target: Option<BasicBlock>,
+    span: Span,
+) {
+    match intrinsic {
+        "llvm.x86.sse2.pause" | "llvm.aarch64.isb" => {
+            // Spin loop hint
+        }
+
+        "llvm.x86.avx.vzeroupper" => {
+            // https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_zeroupper&ig_expand=7218
+            // Do nothing. It is a perf hint anyway.
+        }
+
+        // Used by is_x86_feature_detected!();
+        "llvm.x86.xgetbv" => {
+            intrinsic_args!(fx, args => (xcr_no); intrinsic);
+
+            let xcr_no = xcr_no.load_scalar(fx);
+
+            codegen_inline_asm_inner(
+                fx,
+                &[InlineAsmTemplatePiece::String(
+                    "
+                    xgetbv
+                    // out = rdx << 32 | rax
+                    shl rdx, 32
+                    or rax, rdx
+                    "
+                    .to_string(),
+                )],
+                &[
+                    CInlineAsmOperand::In {
+                        reg: InlineAsmRegOrRegClass::Reg(InlineAsmReg::X86(X86InlineAsmReg::cx)),
+                        value: xcr_no,
+                    },
+                    CInlineAsmOperand::Out {
+                        reg: InlineAsmRegOrRegClass::Reg(InlineAsmReg::X86(X86InlineAsmReg::ax)),
+                        late: true,
+                        place: Some(ret),
+                    },
+                    CInlineAsmOperand::Out {
+                        reg: InlineAsmRegOrRegClass::Reg(InlineAsmReg::X86(X86InlineAsmReg::dx)),
+                        late: true,
+                        place: None,
+                    },
+                ],
+                InlineAsmOptions::NOSTACK | InlineAsmOptions::PURE | InlineAsmOptions::NOMEM,
+            );
+        }
+
+        "llvm.x86.sse3.ldu.dq" | "llvm.x86.avx.ldu.dq.256" => {
+            // https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_lddqu_si128&ig_expand=4009
+            // https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_lddqu_si256&ig_expand=4010
+            intrinsic_args!(fx, args => (ptr); intrinsic);
+
+            // FIXME correctly handle unalignedness
+            let val = CValue::by_ref(Pointer::new(ptr.load_scalar(fx)), ret.layout());
+            ret.write_cvalue(fx, val);
+        }
+
+        "llvm.x86.avx2.gather.d.d"
+        | "llvm.x86.avx2.gather.d.q"
+        | "llvm.x86.avx2.gather.d.ps"
+        | "llvm.x86.avx2.gather.d.pd"
+        | "llvm.x86.avx2.gather.d.d.256"
+        | "llvm.x86.avx2.gather.d.q.256"
+        | "llvm.x86.avx2.gather.d.ps.256"
+        | "llvm.x86.avx2.gather.d.pd.256"
+        | "llvm.x86.avx2.gather.q.d"
+        | "llvm.x86.avx2.gather.q.q"
+        | "llvm.x86.avx2.gather.q.ps"
+        | "llvm.x86.avx2.gather.q.pd"
+        | "llvm.x86.avx2.gather.q.d.256"
+        | "llvm.x86.avx2.gather.q.q.256"
+        | "llvm.x86.avx2.gather.q.ps.256"
+        | "llvm.x86.avx2.gather.q.pd.256" => {
+            // https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_i64gather_pd&ig_expand=3818
+            // https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_i64gather_pd&ig_expand=3819
+            // https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_i64gather_pd&ig_expand=3821
+            // https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_i64gather_pd&ig_expand=3822
+            // ...
+
+            intrinsic_args!(fx, args => (src, ptr, index, mask, scale); intrinsic);
+
+            let (src_lane_count, src_lane_ty) = src.layout().ty.simd_size_and_type(fx.tcx);
+            let (index_lane_count, index_lane_ty) = index.layout().ty.simd_size_and_type(fx.tcx);
+            let (mask_lane_count, mask_lane_ty) = mask.layout().ty.simd_size_and_type(fx.tcx);
+            let (ret_lane_count, ret_lane_ty) = ret.layout().ty.simd_size_and_type(fx.tcx);
+            assert_eq!(src_lane_ty, ret_lane_ty);
+            assert!(index_lane_ty.is_integral());
+            assert_eq!(src_lane_count, mask_lane_count);
+            assert_eq!(src_lane_count, ret_lane_count);
+
+            let lane_clif_ty = fx.clif_type(ret_lane_ty).unwrap();
+            let index_lane_clif_ty = fx.clif_type(index_lane_ty).unwrap();
+            let mask_lane_clif_ty = fx.clif_type(mask_lane_ty).unwrap();
+            let ret_lane_layout = fx.layout_of(ret_lane_ty);
+
+            let ptr = ptr.load_scalar(fx);
+            let scale = scale.load_scalar(fx);
+            let scale = fx.bcx.ins().uextend(types::I64, scale);
+            for lane_idx in 0..std::cmp::min(src_lane_count, index_lane_count) {
+                let src_lane = src.value_lane(fx, lane_idx).load_scalar(fx);
+                let index_lane = index.value_lane(fx, lane_idx).load_scalar(fx);
+                let mask_lane = mask.value_lane(fx, lane_idx).load_scalar(fx);
+                let mask_lane =
+                    fx.bcx.ins().bitcast(mask_lane_clif_ty.as_int(), MemFlags::new(), mask_lane);
+
+                let if_enabled = fx.bcx.create_block();
+                let if_disabled = fx.bcx.create_block();
+                let next = fx.bcx.create_block();
+                let res_lane = fx.bcx.append_block_param(next, lane_clif_ty);
+
+                let mask_lane = match mask_lane_clif_ty {
+                    types::I32 | types::F32 => {
+                        fx.bcx.ins().band_imm(mask_lane, 0x8000_0000u64 as i64)
+                    }
+                    types::I64 | types::F64 => {
+                        fx.bcx.ins().band_imm(mask_lane, 0x8000_0000_0000_0000u64 as i64)
+                    }
+                    _ => unreachable!(),
+                };
+                fx.bcx.ins().brif(mask_lane, if_enabled, &[], if_disabled, &[]);
+                fx.bcx.seal_block(if_enabled);
+                fx.bcx.seal_block(if_disabled);
+
+                fx.bcx.switch_to_block(if_enabled);
+                let index_lane = if index_lane_clif_ty != types::I64 {
+                    fx.bcx.ins().sextend(types::I64, index_lane)
+                } else {
+                    index_lane
+                };
+                let offset = fx.bcx.ins().imul(index_lane, scale);
+                let lane_ptr = fx.bcx.ins().iadd(ptr, offset);
+                let res = fx.bcx.ins().load(lane_clif_ty, MemFlags::trusted(), lane_ptr, 0);
+                fx.bcx.ins().jump(next, &[res]);
+
+                fx.bcx.switch_to_block(if_disabled);
+                fx.bcx.ins().jump(next, &[src_lane]);
+
+                fx.bcx.seal_block(next);
+                fx.bcx.switch_to_block(next);
+
+                fx.bcx.ins().nop();
+
+                ret.place_lane(fx, lane_idx)
+                    .write_cvalue(fx, CValue::by_val(res_lane, ret_lane_layout));
+            }
+
+            for lane_idx in std::cmp::min(src_lane_count, index_lane_count)..ret_lane_count {
+                let zero_lane = fx.bcx.ins().iconst(mask_lane_clif_ty.as_int(), 0);
+                let zero_lane = fx.bcx.ins().bitcast(mask_lane_clif_ty, MemFlags::new(), zero_lane);
+                ret.place_lane(fx, lane_idx)
+                    .write_cvalue(fx, CValue::by_val(zero_lane, ret_lane_layout));
+            }
+        }
+
+        "llvm.x86.sse.add.ss" => {
+            // https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_add_ss&ig_expand=171
+            intrinsic_args!(fx, args => (a, b); intrinsic);
+
+            assert_eq!(a.layout(), b.layout());
+            assert_eq!(a.layout(), ret.layout());
+            let layout = a.layout();
+
+            let (_, lane_ty) = layout.ty.simd_size_and_type(fx.tcx);
+            assert!(lane_ty.is_floating_point());
+            let ret_lane_layout = fx.layout_of(lane_ty);
+
+            ret.write_cvalue(fx, a);
+
+            let a_lane = a.value_lane(fx, 0).load_scalar(fx);
+            let b_lane = b.value_lane(fx, 0).load_scalar(fx);
+
+            let res = fx.bcx.ins().fadd(a_lane, b_lane);
+
+            let res_lane = CValue::by_val(res, ret_lane_layout);
+            ret.place_lane(fx, 0).write_cvalue(fx, res_lane);
+        }
+
+        "llvm.x86.sse.sqrt.ps" => {
+            // https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_sqrt_ps&ig_expand=6245
+            intrinsic_args!(fx, args => (a); intrinsic);
+
+            // FIXME use vector instructions when possible
+            simd_for_each_lane(fx, a, ret, &|fx, _lane_ty, _res_lane_ty, lane| {
+                fx.bcx.ins().sqrt(lane)
+            });
+        }
+
+        "llvm.x86.sse.max.ps" => {
+            // https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_max_ps&ig_expand=4357
+            intrinsic_args!(fx, args => (a, b); intrinsic);
+
+            simd_pair_for_each_lane(
+                fx,
+                a,
+                b,
+                ret,
+                &|fx, _lane_ty, _res_lane_ty, a_lane, b_lane| fx.bcx.ins().fmax(a_lane, b_lane),
+            );
+        }
+
+        "llvm.x86.sse.min.ps" => {
+            // https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_min_ps&ig_expand=4489
+            intrinsic_args!(fx, args => (a, b); intrinsic);
+
+            simd_pair_for_each_lane(
+                fx,
+                a,
+                b,
+                ret,
+                &|fx, _lane_ty, _res_lane_ty, a_lane, b_lane| fx.bcx.ins().fmin(a_lane, b_lane),
+            );
+        }
+
+        "llvm.x86.sse.cmp.ps" | "llvm.x86.sse2.cmp.pd" => {
+            let (x, y, kind) = match args {
+                [x, y, kind] => (x, y, kind),
+                _ => bug!("wrong number of args for intrinsic {intrinsic}"),
+            };
+            let x = codegen_operand(fx, &x.node);
+            let y = codegen_operand(fx, &y.node);
+            let kind = match &kind.node {
+                Operand::Constant(const_) => crate::constant::eval_mir_constant(fx, const_).0,
+                Operand::Copy(_) | Operand::Move(_) => unreachable!("{kind:?}"),
+            };
+
+            let flt_cc = match kind
+                .try_to_bits(Size::from_bytes(1))
+                .unwrap_or_else(|| panic!("kind not scalar: {:?}", kind))
+                .try_into()
+                .unwrap()
+            {
+                _CMP_EQ_OQ | _CMP_EQ_OS => FloatCC::Equal,
+                _CMP_LT_OS | _CMP_LT_OQ => FloatCC::LessThan,
+                _CMP_LE_OS | _CMP_LE_OQ => FloatCC::LessThanOrEqual,
+                _CMP_UNORD_Q | _CMP_UNORD_S => FloatCC::Unordered,
+                _CMP_NEQ_UQ | _CMP_NEQ_US => FloatCC::NotEqual,
+                _CMP_NLT_US | _CMP_NLT_UQ => FloatCC::UnorderedOrGreaterThanOrEqual,
+                _CMP_NLE_US | _CMP_NLE_UQ => FloatCC::UnorderedOrGreaterThan,
+                _CMP_ORD_Q | _CMP_ORD_S => FloatCC::Ordered,
+                _CMP_EQ_UQ | _CMP_EQ_US => FloatCC::UnorderedOrEqual,
+                _CMP_NGE_US | _CMP_NGE_UQ => FloatCC::UnorderedOrLessThan,
+                _CMP_NGT_US | _CMP_NGT_UQ => FloatCC::UnorderedOrLessThanOrEqual,
+                _CMP_FALSE_OQ | _CMP_FALSE_OS => todo!(),
+                _CMP_NEQ_OQ | _CMP_NEQ_OS => FloatCC::OrderedNotEqual,
+                _CMP_GE_OS | _CMP_GE_OQ => FloatCC::GreaterThanOrEqual,
+                _CMP_GT_OS | _CMP_GT_OQ => FloatCC::GreaterThan,
+                _CMP_TRUE_UQ | _CMP_TRUE_US => todo!(),
+
+                kind => unreachable!("kind {:?}", kind),
+            };
+
+            // Copied from stdarch
+            /// Equal (ordered, non-signaling)
+            const _CMP_EQ_OQ: i32 = 0x00;
+            /// Less-than (ordered, signaling)
+            const _CMP_LT_OS: i32 = 0x01;
+            /// Less-than-or-equal (ordered, signaling)
+            const _CMP_LE_OS: i32 = 0x02;
+            /// Unordered (non-signaling)
+            const _CMP_UNORD_Q: i32 = 0x03;
+            /// Not-equal (unordered, non-signaling)
+            const _CMP_NEQ_UQ: i32 = 0x04;
+            /// Not-less-than (unordered, signaling)
+            const _CMP_NLT_US: i32 = 0x05;
+            /// Not-less-than-or-equal (unordered, signaling)
+            const _CMP_NLE_US: i32 = 0x06;
+            /// Ordered (non-signaling)
+            const _CMP_ORD_Q: i32 = 0x07;
+            /// Equal (unordered, non-signaling)
+            const _CMP_EQ_UQ: i32 = 0x08;
+            /// Not-greater-than-or-equal (unordered, signaling)
+            const _CMP_NGE_US: i32 = 0x09;
+            /// Not-greater-than (unordered, signaling)
+            const _CMP_NGT_US: i32 = 0x0a;
+            /// False (ordered, non-signaling)
+            const _CMP_FALSE_OQ: i32 = 0x0b;
+            /// Not-equal (ordered, non-signaling)
+            const _CMP_NEQ_OQ: i32 = 0x0c;
+            /// Greater-than-or-equal (ordered, signaling)
+            const _CMP_GE_OS: i32 = 0x0d;
+            /// Greater-than (ordered, signaling)
+            const _CMP_GT_OS: i32 = 0x0e;
+            /// True (unordered, non-signaling)
+            const _CMP_TRUE_UQ: i32 = 0x0f;
+            /// Equal (ordered, signaling)
+            const _CMP_EQ_OS: i32 = 0x10;
+            /// Less-than (ordered, non-signaling)
+            const _CMP_LT_OQ: i32 = 0x11;
+            /// Less-than-or-equal (ordered, non-signaling)
+            const _CMP_LE_OQ: i32 = 0x12;
+            /// Unordered (signaling)
+            const _CMP_UNORD_S: i32 = 0x13;
+            /// Not-equal (unordered, signaling)
+            const _CMP_NEQ_US: i32 = 0x14;
+            /// Not-less-than (unordered, non-signaling)
+            const _CMP_NLT_UQ: i32 = 0x15;
+            /// Not-less-than-or-equal (unordered, non-signaling)
+            const _CMP_NLE_UQ: i32 = 0x16;
+            /// Ordered (signaling)
+            const _CMP_ORD_S: i32 = 0x17;
+            /// Equal (unordered, signaling)
+            const _CMP_EQ_US: i32 = 0x18;
+            /// Not-greater-than-or-equal (unordered, non-signaling)
+            const _CMP_NGE_UQ: i32 = 0x19;
+            /// Not-greater-than (unordered, non-signaling)
+            const _CMP_NGT_UQ: i32 = 0x1a;
+            /// False (ordered, signaling)
+            const _CMP_FALSE_OS: i32 = 0x1b;
+            /// Not-equal (ordered, signaling)
+            const _CMP_NEQ_OS: i32 = 0x1c;
+            /// Greater-than-or-equal (ordered, non-signaling)
+            const _CMP_GE_OQ: i32 = 0x1d;
+            /// Greater-than (ordered, non-signaling)
+            const _CMP_GT_OQ: i32 = 0x1e;
+            /// True (unordered, signaling)
+            const _CMP_TRUE_US: i32 = 0x1f;
+
+            simd_pair_for_each_lane(fx, x, y, ret, &|fx, lane_ty, res_lane_ty, x_lane, y_lane| {
+                let res_lane = match lane_ty.kind() {
+                    ty::Float(_) => fx.bcx.ins().fcmp(flt_cc, x_lane, y_lane),
+                    _ => unreachable!("{:?}", lane_ty),
+                };
+                bool_to_zero_or_max_uint(fx, res_lane_ty, res_lane)
+            });
+        }
+        "llvm.x86.ssse3.pshuf.b.128" | "llvm.x86.avx2.pshuf.b" => {
+            let (a, b) = match args {
+                [a, b] => (a, b),
+                _ => bug!("wrong number of args for intrinsic {intrinsic}"),
+            };
+            let a = codegen_operand(fx, &a.node);
+            let b = codegen_operand(fx, &b.node);
+
+            // Based on the pseudocode at https://github.com/rust-lang/stdarch/blob/1cfbca8b38fd9b4282b2f054f61c6ca69fc7ce29/crates/core_arch/src/x86/avx2.rs#L2319-L2332
+            let zero = fx.bcx.ins().iconst(types::I8, 0);
+            for i in 0..16 {
+                let b_lane = b.value_lane(fx, i).load_scalar(fx);
+                let is_zero = fx.bcx.ins().band_imm(b_lane, 0x80);
+                let a_idx = fx.bcx.ins().band_imm(b_lane, 0xf);
+                let a_idx = fx.bcx.ins().uextend(fx.pointer_type, a_idx);
+                let a_lane = a.value_lane_dyn(fx, a_idx).load_scalar(fx);
+                let res = fx.bcx.ins().select(is_zero, zero, a_lane);
+                ret.place_lane(fx, i).to_ptr().store(fx, res, MemFlags::trusted());
+            }
+
+            if intrinsic == "llvm.x86.avx2.pshuf.b" {
+                for i in 16..32 {
+                    let b_lane = b.value_lane(fx, i).load_scalar(fx);
+                    let is_zero = fx.bcx.ins().band_imm(b_lane, 0x80);
+                    let b_lane_masked = fx.bcx.ins().band_imm(b_lane, 0xf);
+                    let a_idx = fx.bcx.ins().iadd_imm(b_lane_masked, 16);
+                    let a_idx = fx.bcx.ins().uextend(fx.pointer_type, a_idx);
+                    let a_lane = a.value_lane_dyn(fx, a_idx).load_scalar(fx);
+                    let res = fx.bcx.ins().select(is_zero, zero, a_lane);
+                    ret.place_lane(fx, i).to_ptr().store(fx, res, MemFlags::trusted());
+                }
+            }
+        }
+        "llvm.x86.avx2.permd" => {
+            // https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_permutevar8x32_epi32
+            intrinsic_args!(fx, args => (a, idx); intrinsic);
+
+            for j in 0..=7 {
+                let index = idx.value_typed_lane(fx, fx.tcx.types.u32, j).load_scalar(fx);
+                let index = fx.bcx.ins().uextend(fx.pointer_type, index);
+                let value = a.value_lane_dyn(fx, index).load_scalar(fx);
+                ret.place_typed_lane(fx, fx.tcx.types.u32, j).to_ptr().store(
+                    fx,
+                    value,
+                    MemFlags::trusted(),
+                );
+            }
+        }
+        "llvm.x86.avx2.vperm2i128"
+        | "llvm.x86.avx.vperm2f128.ps.256"
+        | "llvm.x86.avx.vperm2f128.pd.256" => {
+            // https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_permute2x128_si256
+            // https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_permute2f128_ps
+            // https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_permute2f128_pd
+            let (a, b, imm8) = match args {
+                [a, b, imm8] => (a, b, imm8),
+                _ => bug!("wrong number of args for intrinsic {intrinsic}"),
+            };
+            let a = codegen_operand(fx, &a.node);
+            let b = codegen_operand(fx, &b.node);
+            let imm8 = codegen_operand(fx, &imm8.node).load_scalar(fx);
+
+            let a_low = a.value_typed_lane(fx, fx.tcx.types.u128, 0).load_scalar(fx);
+            let a_high = a.value_typed_lane(fx, fx.tcx.types.u128, 1).load_scalar(fx);
+
+            let b_low = b.value_typed_lane(fx, fx.tcx.types.u128, 0).load_scalar(fx);
+            let b_high = b.value_typed_lane(fx, fx.tcx.types.u128, 1).load_scalar(fx);
+
+            fn select4(
+                fx: &mut FunctionCx<'_, '_, '_>,
+                a_high: Value,
+                a_low: Value,
+                b_high: Value,
+                b_low: Value,
+                control: Value,
+            ) -> Value {
+                let a_or_b = fx.bcx.ins().band_imm(control, 0b0010);
+                let high_or_low = fx.bcx.ins().band_imm(control, 0b0001);
+                let is_zero = fx.bcx.ins().band_imm(control, 0b1000);
+
+                let zero = fx.bcx.ins().iconst(types::I64, 0);
+                let zero = fx.bcx.ins().iconcat(zero, zero);
+
+                let res_a = fx.bcx.ins().select(high_or_low, a_high, a_low);
+                let res_b = fx.bcx.ins().select(high_or_low, b_high, b_low);
+                let res = fx.bcx.ins().select(a_or_b, res_b, res_a);
+                fx.bcx.ins().select(is_zero, zero, res)
+            }
+
+            let control0 = imm8;
+            let res_low = select4(fx, a_high, a_low, b_high, b_low, control0);
+
+            let control1 = fx.bcx.ins().ushr_imm(imm8, 4);
+            let res_high = select4(fx, a_high, a_low, b_high, b_low, control1);
+
+            ret.place_typed_lane(fx, fx.tcx.types.u128, 0).to_ptr().store(
+                fx,
+                res_low,
+                MemFlags::trusted(),
+            );
+            ret.place_typed_lane(fx, fx.tcx.types.u128, 1).to_ptr().store(
+                fx,
+                res_high,
+                MemFlags::trusted(),
+            );
+        }
+        "llvm.x86.ssse3.pabs.b.128" | "llvm.x86.ssse3.pabs.w.128" | "llvm.x86.ssse3.pabs.d.128" => {
+            intrinsic_args!(fx, args => (a); intrinsic);
+
+            simd_for_each_lane(fx, a, ret, &|fx, _lane_ty, _res_lane_ty, lane| {
+                fx.bcx.ins().iabs(lane)
+            });
+        }
+        "llvm.x86.sse2.cvttps2dq" => {
+            // https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cvttps_epi32&ig_expand=2429
+            intrinsic_args!(fx, args => (a); intrinsic);
+            let a = a.load_scalar(fx);
+
+            // Using inline asm instead of fcvt_to_sint_sat as unrepresentable values are turned
+            // into 0x80000000 for which Cranelift doesn't have a native instruction.
+            codegen_inline_asm_inner(
+                fx,
+                &[InlineAsmTemplatePiece::String(format!("cvttps2dq xmm0, xmm0"))],
+                &[CInlineAsmOperand::InOut {
+                    reg: InlineAsmRegOrRegClass::Reg(InlineAsmReg::X86(X86InlineAsmReg::xmm0)),
+                    _late: true,
+                    in_value: a,
+                    out_place: Some(ret),
+                }],
+                InlineAsmOptions::NOSTACK | InlineAsmOptions::PURE | InlineAsmOptions::NOMEM,
+            );
+        }
+        "llvm.x86.addcarry.32" | "llvm.x86.addcarry.64" => {
+            intrinsic_args!(fx, args => (c_in, a, b); intrinsic);
+            let c_in = c_in.load_scalar(fx);
+
+            let (cb_out, c) = llvm_add_sub(fx, BinOp::Add, c_in, a, b);
+
+            let layout = fx.layout_of(Ty::new_tup(fx.tcx, &[fx.tcx.types.u8, a.layout().ty]));
+            let val = CValue::by_val_pair(cb_out, c, layout);
+            ret.write_cvalue(fx, val);
+        }
+        "llvm.x86.addcarryx.u32" | "llvm.x86.addcarryx.u64" => {
+            intrinsic_args!(fx, args => (c_in, a, b, out); intrinsic);
+            let c_in = c_in.load_scalar(fx);
+
+            let (cb_out, c) = llvm_add_sub(fx, BinOp::Add, c_in, a, b);
+
+            Pointer::new(out.load_scalar(fx)).store(fx, c, MemFlags::trusted());
+            ret.write_cvalue(fx, CValue::by_val(cb_out, fx.layout_of(fx.tcx.types.u8)));
+        }
+        "llvm.x86.subborrow.32" | "llvm.x86.subborrow.64" => {
+            intrinsic_args!(fx, args => (b_in, a, b); intrinsic);
+            let b_in = b_in.load_scalar(fx);
+
+            let (cb_out, c) = llvm_add_sub(fx, BinOp::Sub, b_in, a, b);
+
+            let layout = fx.layout_of(Ty::new_tup(fx.tcx, &[fx.tcx.types.u8, a.layout().ty]));
+            let val = CValue::by_val_pair(cb_out, c, layout);
+            ret.write_cvalue(fx, val);
+        }
+        "llvm.x86.sse2.pavg.b" | "llvm.x86.sse2.pavg.w" => {
+            intrinsic_args!(fx, args => (a, b); intrinsic);
+
+            // FIXME use vector instructions when possible
+            simd_pair_for_each_lane(
+                fx,
+                a,
+                b,
+                ret,
+                &|fx, _lane_ty, _res_lane_ty, a_lane, b_lane| {
+                    // (a + b + 1) >> 1
+                    let lane_ty = fx.bcx.func.dfg.value_type(a_lane);
+                    let a_lane = fx.bcx.ins().uextend(lane_ty.double_width().unwrap(), a_lane);
+                    let b_lane = fx.bcx.ins().uextend(lane_ty.double_width().unwrap(), b_lane);
+                    let sum = fx.bcx.ins().iadd(a_lane, b_lane);
+                    let num_plus_one = fx.bcx.ins().iadd_imm(sum, 1);
+                    let res = fx.bcx.ins().ushr_imm(num_plus_one, 1);
+                    fx.bcx.ins().ireduce(lane_ty, res)
+                },
+            );
+        }
+        "llvm.x86.sse2.psra.w" => {
+            intrinsic_args!(fx, args => (a, count); intrinsic);
+
+            let count_lane = count.force_stack(fx).0.load(fx, types::I64, MemFlags::trusted());
+            let lane_ty = fx.clif_type(a.layout().ty.simd_size_and_type(fx.tcx).1).unwrap();
+            let max_count = fx.bcx.ins().iconst(types::I64, i64::from(lane_ty.bits() - 1));
+            let saturated_count = fx.bcx.ins().umin(count_lane, max_count);
+
+            // FIXME use vector instructions when possible
+            simd_for_each_lane(fx, a, ret, &|fx, _lane_ty, _res_lane_ty, a_lane| {
+                fx.bcx.ins().sshr(a_lane, saturated_count)
+            });
+        }
+        "llvm.x86.sse2.psad.bw" | "llvm.x86.avx2.psad.bw" => {
+            // https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_sad_epu8&ig_expand=5770
+            // https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_sad_epu8&ig_expand=5771
+            intrinsic_args!(fx, args => (a, b); intrinsic);
+
+            assert_eq!(a.layout(), b.layout());
+            let layout = a.layout();
+
+            let (lane_count, lane_ty) = layout.ty.simd_size_and_type(fx.tcx);
+            let (ret_lane_count, ret_lane_ty) = ret.layout().ty.simd_size_and_type(fx.tcx);
+            assert_eq!(lane_ty, fx.tcx.types.u8);
+            assert_eq!(ret_lane_ty, fx.tcx.types.u64);
+            assert_eq!(lane_count, ret_lane_count * 8);
+
+            let ret_lane_layout = fx.layout_of(fx.tcx.types.u64);
+            for out_lane_idx in 0..lane_count / 8 {
+                let mut lane_diff_acc = fx.bcx.ins().iconst(types::I64, 0);
+
+                for lane_idx in out_lane_idx * 8..out_lane_idx * 8 + 8 {
+                    let a_lane = a.value_lane(fx, lane_idx).load_scalar(fx);
+                    let a_lane = fx.bcx.ins().uextend(types::I16, a_lane);
+                    let b_lane = b.value_lane(fx, lane_idx).load_scalar(fx);
+                    let b_lane = fx.bcx.ins().uextend(types::I16, b_lane);
+
+                    let lane_diff = fx.bcx.ins().isub(a_lane, b_lane);
+                    let abs_lane_diff = fx.bcx.ins().iabs(lane_diff);
+                    let abs_lane_diff = fx.bcx.ins().uextend(types::I64, abs_lane_diff);
+                    lane_diff_acc = fx.bcx.ins().iadd(lane_diff_acc, abs_lane_diff);
+                }
+
+                let res_lane = CValue::by_val(lane_diff_acc, ret_lane_layout);
+
+                ret.place_lane(fx, out_lane_idx).write_cvalue(fx, res_lane);
+            }
+        }
+        "llvm.x86.ssse3.pmadd.ub.sw.128" | "llvm.x86.avx2.pmadd.ub.sw" => {
+            // https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maddubs_epi16&ig_expand=4267
+            // https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maddubs_epi16&ig_expand=4270
+            intrinsic_args!(fx, args => (a, b); intrinsic);
+
+            let (lane_count, lane_ty) = a.layout().ty.simd_size_and_type(fx.tcx);
+            let (ret_lane_count, ret_lane_ty) = ret.layout().ty.simd_size_and_type(fx.tcx);
+            assert_eq!(lane_ty, fx.tcx.types.u8);
+            assert_eq!(ret_lane_ty, fx.tcx.types.i16);
+            assert_eq!(lane_count, ret_lane_count * 2);
+
+            let ret_lane_layout = fx.layout_of(fx.tcx.types.i16);
+            for out_lane_idx in 0..lane_count / 2 {
+                let a_lane0 = a.value_lane(fx, out_lane_idx * 2).load_scalar(fx);
+                let a_lane0 = fx.bcx.ins().uextend(types::I16, a_lane0);
+                let b_lane0 = b.value_lane(fx, out_lane_idx * 2).load_scalar(fx);
+                let b_lane0 = fx.bcx.ins().sextend(types::I16, b_lane0);
+
+                let a_lane1 = a.value_lane(fx, out_lane_idx * 2 + 1).load_scalar(fx);
+                let a_lane1 = fx.bcx.ins().uextend(types::I16, a_lane1);
+                let b_lane1 = b.value_lane(fx, out_lane_idx * 2 + 1).load_scalar(fx);
+                let b_lane1 = fx.bcx.ins().sextend(types::I16, b_lane1);
+
+                let mul0: Value = fx.bcx.ins().imul(a_lane0, b_lane0);
+                let mul1 = fx.bcx.ins().imul(a_lane1, b_lane1);
+
+                let (val, has_overflow) = fx.bcx.ins().sadd_overflow(mul0, mul1);
+
+                let rhs_ge_zero = fx.bcx.ins().icmp_imm(IntCC::SignedGreaterThanOrEqual, mul1, 0);
+
+                let min = fx.bcx.ins().iconst(types::I16, i64::from(i16::MIN as u16));
+                let max = fx.bcx.ins().iconst(types::I16, i64::from(i16::MAX as u16));
+
+                let sat_val = fx.bcx.ins().select(rhs_ge_zero, max, min);
+                let res_lane = fx.bcx.ins().select(has_overflow, sat_val, val);
+
+                let res_lane = CValue::by_val(res_lane, ret_lane_layout);
+
+                ret.place_lane(fx, out_lane_idx).write_cvalue(fx, res_lane);
+            }
+        }
+        "llvm.x86.sse2.pmadd.wd" | "llvm.x86.avx2.pmadd.wd" => {
+            // https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_madd_epi16&ig_expand=4231
+            // https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_madd_epi16&ig_expand=4234
+            intrinsic_args!(fx, args => (a, b); intrinsic);
+
+            assert_eq!(a.layout(), b.layout());
+            let layout = a.layout();
+
+            let (lane_count, lane_ty) = layout.ty.simd_size_and_type(fx.tcx);
+            let (ret_lane_count, ret_lane_ty) = ret.layout().ty.simd_size_and_type(fx.tcx);
+            assert_eq!(lane_ty, fx.tcx.types.i16);
+            assert_eq!(ret_lane_ty, fx.tcx.types.i32);
+            assert_eq!(lane_count, ret_lane_count * 2);
+
+            let ret_lane_layout = fx.layout_of(fx.tcx.types.i32);
+            for out_lane_idx in 0..lane_count / 2 {
+                let a_lane0 = a.value_lane(fx, out_lane_idx * 2).load_scalar(fx);
+                let a_lane0 = fx.bcx.ins().sextend(types::I32, a_lane0);
+                let b_lane0 = b.value_lane(fx, out_lane_idx * 2).load_scalar(fx);
+                let b_lane0 = fx.bcx.ins().sextend(types::I32, b_lane0);
+
+                let a_lane1 = a.value_lane(fx, out_lane_idx * 2 + 1).load_scalar(fx);
+                let a_lane1 = fx.bcx.ins().sextend(types::I32, a_lane1);
+                let b_lane1 = b.value_lane(fx, out_lane_idx * 2 + 1).load_scalar(fx);
+                let b_lane1 = fx.bcx.ins().sextend(types::I32, b_lane1);
+
+                let mul0: Value = fx.bcx.ins().imul(a_lane0, b_lane0);
+                let mul1 = fx.bcx.ins().imul(a_lane1, b_lane1);
+
+                let res_lane = fx.bcx.ins().iadd(mul0, mul1);
+                let res_lane = CValue::by_val(res_lane, ret_lane_layout);
+
+                ret.place_lane(fx, out_lane_idx).write_cvalue(fx, res_lane);
+            }
+        }
+
+        "llvm.x86.ssse3.pmul.hr.sw.128" => {
+            // https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mulhrs_epi16&ig_expand=4782
+            intrinsic_args!(fx, args => (a, b); intrinsic);
+
+            assert_eq!(a.layout(), b.layout());
+            let layout = a.layout();
+
+            let (lane_count, lane_ty) = layout.ty.simd_size_and_type(fx.tcx);
+            let (ret_lane_count, ret_lane_ty) = ret.layout().ty.simd_size_and_type(fx.tcx);
+            assert_eq!(lane_ty, fx.tcx.types.i16);
+            assert_eq!(ret_lane_ty, fx.tcx.types.i16);
+            assert_eq!(lane_count, ret_lane_count);
+
+            let ret_lane_layout = fx.layout_of(fx.tcx.types.i16);
+            for out_lane_idx in 0..lane_count {
+                let a_lane = a.value_lane(fx, out_lane_idx).load_scalar(fx);
+                let a_lane = fx.bcx.ins().sextend(types::I32, a_lane);
+                let b_lane = b.value_lane(fx, out_lane_idx).load_scalar(fx);
+                let b_lane = fx.bcx.ins().sextend(types::I32, b_lane);
+
+                let mul: Value = fx.bcx.ins().imul(a_lane, b_lane);
+                let shifted = fx.bcx.ins().ushr_imm(mul, 14);
+                let incremented = fx.bcx.ins().iadd_imm(shifted, 1);
+                let shifted_again = fx.bcx.ins().ushr_imm(incremented, 1);
+
+                let res_lane = fx.bcx.ins().ireduce(types::I16, shifted_again);
+                let res_lane = CValue::by_val(res_lane, ret_lane_layout);
+
+                ret.place_lane(fx, out_lane_idx).write_cvalue(fx, res_lane);
+            }
+        }
+
+        "llvm.x86.sse2.packuswb.128" => {
+            // https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_packus_epi16&ig_expand=4903
+            intrinsic_args!(fx, args => (a, b); intrinsic);
+
+            pack_instruction(fx, a, b, ret, PackSize::U8, PackWidth::Sse);
+        }
+
+        "llvm.x86.sse2.packsswb.128" => {
+            // https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_packs_epi16&ig_expand=4848
+            intrinsic_args!(fx, args => (a, b); intrinsic);
+
+            pack_instruction(fx, a, b, ret, PackSize::S8, PackWidth::Sse);
+        }
+
+        "llvm.x86.avx2.packuswb" => {
+            // https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_packus_epi16&ig_expand=4906
+            intrinsic_args!(fx, args => (a, b); intrinsic);
+
+            pack_instruction(fx, a, b, ret, PackSize::U8, PackWidth::Avx);
+        }
+
+        "llvm.x86.avx2.packsswb" => {
+            // https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_packs_epi16&ig_expand=4851
+            intrinsic_args!(fx, args => (a, b); intrinsic);
+
+            pack_instruction(fx, a, b, ret, PackSize::S8, PackWidth::Avx);
+        }
+
+        "llvm.x86.sse41.packusdw" => {
+            // https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_packus_epi32&ig_expand=4912
+            intrinsic_args!(fx, args => (a, b); intrinsic);
+
+            pack_instruction(fx, a, b, ret, PackSize::U16, PackWidth::Sse);
+        }
+
+        "llvm.x86.sse2.packssdw.128" => {
+            // https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_packs_epi32&ig_expand=4889
+            intrinsic_args!(fx, args => (a, b); intrinsic);
+
+            pack_instruction(fx, a, b, ret, PackSize::S16, PackWidth::Sse);
+        }
+
+        "llvm.x86.avx2.packusdw" => {
+            // https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_packus_epi32&ig_expand=4883
+            intrinsic_args!(fx, args => (a, b); intrinsic);
+
+            pack_instruction(fx, a, b, ret, PackSize::U16, PackWidth::Avx);
+        }
+
+        "llvm.x86.avx2.packssdw" => {
+            // https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_packs_epi32&ig_expand=4892
+            intrinsic_args!(fx, args => (a, b); intrinsic);
+
+            pack_instruction(fx, a, b, ret, PackSize::S16, PackWidth::Avx);
+        }
+
+        "llvm.x86.fma.vfmaddsub.ps"
+        | "llvm.x86.fma.vfmaddsub.pd"
+        | "llvm.x86.fma.vfmaddsub.ps.256"
+        | "llvm.x86.fma.vfmaddsub.pd.256" => {
+            // https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_fmaddsub_ps&ig_expand=3205
+            // https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_fmaddsub_pd&ig_expand=3181
+            // https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_fmaddsub_ps&ig_expand=3209
+            // https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_fmaddsub_pd&ig_expand=3185
+            intrinsic_args!(fx, args => (a, b, c); intrinsic);
+
+            assert_eq!(a.layout(), b.layout());
+            assert_eq!(a.layout(), c.layout());
+            let layout = a.layout();
+
+            let (lane_count, lane_ty) = layout.ty.simd_size_and_type(fx.tcx);
+            let (ret_lane_count, ret_lane_ty) = ret.layout().ty.simd_size_and_type(fx.tcx);
+            assert!(lane_ty.is_floating_point());
+            assert!(ret_lane_ty.is_floating_point());
+            assert_eq!(lane_count, ret_lane_count);
+            let ret_lane_layout = fx.layout_of(ret_lane_ty);
+
+            for idx in 0..lane_count {
+                let a_lane = a.value_lane(fx, idx).load_scalar(fx);
+                let b_lane = b.value_lane(fx, idx).load_scalar(fx);
+                let c_lane = c.value_lane(fx, idx).load_scalar(fx);
+
+                let mul = fx.bcx.ins().fmul(a_lane, b_lane);
+                let res = if idx & 1 == 0 {
+                    fx.bcx.ins().fsub(mul, c_lane)
+                } else {
+                    fx.bcx.ins().fadd(mul, c_lane)
+                };
+
+                let res_lane = CValue::by_val(res, ret_lane_layout);
+                ret.place_lane(fx, idx).write_cvalue(fx, res_lane);
+            }
+        }
+
+        "llvm.x86.fma.vfmsubadd.ps"
+        | "llvm.x86.fma.vfmsubadd.pd"
+        | "llvm.x86.fma.vfmsubadd.ps.256"
+        | "llvm.x86.fma.vfmsubadd.pd.256" => {
+            // https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_fmsubadd_ps&ig_expand=3325
+            // https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_fmsubadd_pd&ig_expand=3301
+            // https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_fmsubadd_ps&ig_expand=3329
+            // https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_fmsubadd_pd&ig_expand=3305
+            intrinsic_args!(fx, args => (a, b, c); intrinsic);
+
+            assert_eq!(a.layout(), b.layout());
+            assert_eq!(a.layout(), c.layout());
+            let layout = a.layout();
+
+            let (lane_count, lane_ty) = layout.ty.simd_size_and_type(fx.tcx);
+            let (ret_lane_count, ret_lane_ty) = ret.layout().ty.simd_size_and_type(fx.tcx);
+            assert!(lane_ty.is_floating_point());
+            assert!(ret_lane_ty.is_floating_point());
+            assert_eq!(lane_count, ret_lane_count);
+            let ret_lane_layout = fx.layout_of(ret_lane_ty);
+
+            for idx in 0..lane_count {
+                let a_lane = a.value_lane(fx, idx).load_scalar(fx);
+                let b_lane = b.value_lane(fx, idx).load_scalar(fx);
+                let c_lane = c.value_lane(fx, idx).load_scalar(fx);
+
+                let mul = fx.bcx.ins().fmul(a_lane, b_lane);
+                let res = if idx & 1 == 0 {
+                    fx.bcx.ins().fadd(mul, c_lane)
+                } else {
+                    fx.bcx.ins().fsub(mul, c_lane)
+                };
+
+                let res_lane = CValue::by_val(res, ret_lane_layout);
+                ret.place_lane(fx, idx).write_cvalue(fx, res_lane);
+            }
+        }
+
+        "llvm.x86.fma.vfnmadd.ps"
+        | "llvm.x86.fma.vfnmadd.pd"
+        | "llvm.x86.fma.vfnmadd.ps.256"
+        | "llvm.x86.fma.vfnmadd.pd.256" => {
+            // https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_fnmadd_ps&ig_expand=3391
+            // https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_fnmadd_pd&ig_expand=3367
+            // https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_fnmadd_ps&ig_expand=3395
+            // https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_fnmadd_pd&ig_expand=3371
+            intrinsic_args!(fx, args => (a, b, c); intrinsic);
+
+            assert_eq!(a.layout(), b.layout());
+            assert_eq!(a.layout(), c.layout());
+            let layout = a.layout();
+
+            let (lane_count, lane_ty) = layout.ty.simd_size_and_type(fx.tcx);
+            let (ret_lane_count, ret_lane_ty) = ret.layout().ty.simd_size_and_type(fx.tcx);
+            assert!(lane_ty.is_floating_point());
+            assert!(ret_lane_ty.is_floating_point());
+            assert_eq!(lane_count, ret_lane_count);
+            let ret_lane_layout = fx.layout_of(ret_lane_ty);
+
+            for idx in 0..lane_count {
+                let a_lane = a.value_lane(fx, idx).load_scalar(fx);
+                let b_lane = b.value_lane(fx, idx).load_scalar(fx);
+                let c_lane = c.value_lane(fx, idx).load_scalar(fx);
+
+                let mul = fx.bcx.ins().fmul(a_lane, b_lane);
+                let neg_mul = fx.bcx.ins().fneg(mul);
+                let res = fx.bcx.ins().fadd(neg_mul, c_lane);
+
+                let res_lane = CValue::by_val(res, ret_lane_layout);
+                ret.place_lane(fx, idx).write_cvalue(fx, res_lane);
+            }
+        }
+
+        "llvm.x86.sse42.crc32.32.8"
+        | "llvm.x86.sse42.crc32.32.16"
+        | "llvm.x86.sse42.crc32.32.32"
+        | "llvm.x86.sse42.crc32.64.64" => {
+            // https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#ig_expand=1419&text=_mm_crc32_u32
+            intrinsic_args!(fx, args => (crc, v); intrinsic);
+
+            let crc = crc.load_scalar(fx);
+            let v = v.load_scalar(fx);
+
+            let asm = match intrinsic {
+                "llvm.x86.sse42.crc32.32.8" => "crc32 eax, dl",
+                "llvm.x86.sse42.crc32.32.16" => "crc32 eax, dx",
+                "llvm.x86.sse42.crc32.32.32" => "crc32 eax, edx",
+                "llvm.x86.sse42.crc32.64.64" => "crc32 rax, rdx",
+                _ => unreachable!(),
+            };
+
+            codegen_inline_asm_inner(
+                fx,
+                &[InlineAsmTemplatePiece::String(asm.to_string())],
+                &[
+                    CInlineAsmOperand::InOut {
+                        reg: InlineAsmRegOrRegClass::Reg(InlineAsmReg::X86(X86InlineAsmReg::ax)),
+                        _late: true,
+                        in_value: crc,
+                        out_place: Some(ret),
+                    },
+                    CInlineAsmOperand::In {
+                        reg: InlineAsmRegOrRegClass::Reg(InlineAsmReg::X86(X86InlineAsmReg::dx)),
+                        value: v,
+                    },
+                ],
+                InlineAsmOptions::NOSTACK | InlineAsmOptions::PURE | InlineAsmOptions::NOMEM,
+            );
+        }
+
+        "llvm.x86.sse42.pcmpestri128" => {
+            // https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmpestri&ig_expand=939
+            intrinsic_args!(fx, args => (a, la, b, lb, _imm8); intrinsic);
+
+            let a = a.load_scalar(fx);
+            let la = la.load_scalar(fx);
+            let b = b.load_scalar(fx);
+            let lb = lb.load_scalar(fx);
+
+            let imm8 =
+                if let Some(imm8) = crate::constant::mir_operand_get_const_val(fx, &args[4].node) {
+                    imm8
+                } else {
+                    fx.tcx
+                        .dcx()
+                        .span_fatal(span, "Index argument for `_mm_cmpestri` is not a constant");
+                };
+
+            let imm8 = imm8.try_to_u8().unwrap_or_else(|_| panic!("kind not scalar: {:?}", imm8));
+
+            codegen_inline_asm_inner(
+                fx,
+                &[InlineAsmTemplatePiece::String(format!("pcmpestri xmm0, xmm1, {imm8}"))],
+                &[
+                    CInlineAsmOperand::In {
+                        reg: InlineAsmRegOrRegClass::Reg(InlineAsmReg::X86(X86InlineAsmReg::xmm0)),
+                        value: a,
+                    },
+                    CInlineAsmOperand::In {
+                        reg: InlineAsmRegOrRegClass::Reg(InlineAsmReg::X86(X86InlineAsmReg::xmm1)),
+                        value: b,
+                    },
+                    // Implicit argument to the pcmpestri intrinsic
+                    CInlineAsmOperand::In {
+                        reg: InlineAsmRegOrRegClass::Reg(InlineAsmReg::X86(X86InlineAsmReg::ax)),
+                        value: la,
+                    },
+                    // Implicit argument to the pcmpestri intrinsic
+                    CInlineAsmOperand::In {
+                        reg: InlineAsmRegOrRegClass::Reg(InlineAsmReg::X86(X86InlineAsmReg::dx)),
+                        value: lb,
+                    },
+                    // Implicit result of the pcmpestri intrinsic
+                    CInlineAsmOperand::Out {
+                        reg: InlineAsmRegOrRegClass::Reg(InlineAsmReg::X86(X86InlineAsmReg::cx)),
+                        late: true,
+                        place: Some(ret),
+                    },
+                ],
+                InlineAsmOptions::NOSTACK | InlineAsmOptions::PURE | InlineAsmOptions::NOMEM,
+            );
+        }
+
+        "llvm.x86.sse42.pcmpestrm128" => {
+            // https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmpestrm&ig_expand=940
+            intrinsic_args!(fx, args => (a, la, b, lb, _imm8); intrinsic);
+
+            let a = a.load_scalar(fx);
+            let la = la.load_scalar(fx);
+            let b = b.load_scalar(fx);
+            let lb = lb.load_scalar(fx);
+
+            let imm8 =
+                if let Some(imm8) = crate::constant::mir_operand_get_const_val(fx, &args[4].node) {
+                    imm8
+                } else {
+                    fx.tcx
+                        .dcx()
+                        .span_fatal(span, "Index argument for `_mm_cmpestrm` is not a constant");
+                };
+
+            let imm8 = imm8.try_to_u8().unwrap_or_else(|_| panic!("kind not scalar: {:?}", imm8));
+
+            codegen_inline_asm_inner(
+                fx,
+                &[InlineAsmTemplatePiece::String(format!("pcmpestrm xmm0, xmm1, {imm8}"))],
+                &[
+                    CInlineAsmOperand::InOut {
+                        reg: InlineAsmRegOrRegClass::Reg(InlineAsmReg::X86(X86InlineAsmReg::xmm0)),
+                        _late: true,
+                        in_value: a,
+                        out_place: Some(ret),
+                    },
+                    CInlineAsmOperand::In {
+                        reg: InlineAsmRegOrRegClass::Reg(InlineAsmReg::X86(X86InlineAsmReg::xmm1)),
+                        value: b,
+                    },
+                    // Implicit argument to the pcmpestri intrinsic
+                    CInlineAsmOperand::In {
+                        reg: InlineAsmRegOrRegClass::Reg(InlineAsmReg::X86(X86InlineAsmReg::ax)),
+                        value: la,
+                    },
+                    // Implicit argument to the pcmpestri intrinsic
+                    CInlineAsmOperand::In {
+                        reg: InlineAsmRegOrRegClass::Reg(InlineAsmReg::X86(X86InlineAsmReg::dx)),
+                        value: lb,
+                    },
+                ],
+                InlineAsmOptions::NOSTACK | InlineAsmOptions::PURE | InlineAsmOptions::NOMEM,
+            );
+        }
+
+        "llvm.x86.pclmulqdq" => {
+            // https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_clmulepi64_si128&ig_expand=772
+            intrinsic_args!(fx, args => (a, b, _imm8); intrinsic);
+
+            let a = a.load_scalar(fx);
+            let b = b.load_scalar(fx);
+
+            let imm8 =
+                if let Some(imm8) = crate::constant::mir_operand_get_const_val(fx, &args[2].node) {
+                    imm8
+                } else {
+                    fx.tcx.dcx().span_fatal(
+                        span,
+                        "Index argument for `_mm_clmulepi64_si128` is not a constant",
+                    );
+                };
+
+            let imm8 = imm8.try_to_u8().unwrap_or_else(|_| panic!("kind not scalar: {:?}", imm8));
+
+            codegen_inline_asm_inner(
+                fx,
+                &[InlineAsmTemplatePiece::String(format!("pclmulqdq xmm0, xmm1, {imm8}"))],
+                &[
+                    CInlineAsmOperand::InOut {
+                        reg: InlineAsmRegOrRegClass::Reg(InlineAsmReg::X86(X86InlineAsmReg::xmm0)),
+                        _late: true,
+                        in_value: a,
+                        out_place: Some(ret),
+                    },
+                    CInlineAsmOperand::In {
+                        reg: InlineAsmRegOrRegClass::Reg(InlineAsmReg::X86(X86InlineAsmReg::xmm1)),
+                        value: b,
+                    },
+                ],
+                InlineAsmOptions::NOSTACK | InlineAsmOptions::PURE | InlineAsmOptions::NOMEM,
+            );
+        }
+
+        "llvm.x86.aesni.aeskeygenassist" => {
+            // https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_aeskeygenassist_si128&ig_expand=261
+            intrinsic_args!(fx, args => (a, _imm8); intrinsic);
+
+            let a = a.load_scalar(fx);
+
+            let imm8 =
+                if let Some(imm8) = crate::constant::mir_operand_get_const_val(fx, &args[1].node) {
+                    imm8
+                } else {
+                    fx.tcx.dcx().span_fatal(
+                        span,
+                        "Index argument for `_mm_aeskeygenassist_si128` is not a constant",
+                    );
+                };
+
+            let imm8 = imm8.try_to_u8().unwrap_or_else(|_| panic!("kind not scalar: {:?}", imm8));
+
+            codegen_inline_asm_inner(
+                fx,
+                &[InlineAsmTemplatePiece::String(format!("aeskeygenassist xmm0, xmm0, {imm8}"))],
+                &[CInlineAsmOperand::InOut {
+                    reg: InlineAsmRegOrRegClass::Reg(InlineAsmReg::X86(X86InlineAsmReg::xmm0)),
+                    _late: true,
+                    in_value: a,
+                    out_place: Some(ret),
+                }],
+                InlineAsmOptions::NOSTACK | InlineAsmOptions::PURE | InlineAsmOptions::NOMEM,
+            );
+        }
+
+        "llvm.x86.aesni.aesimc" => {
+            // https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_aesimc_si128&ig_expand=260
+            intrinsic_args!(fx, args => (a); intrinsic);
+
+            let a = a.load_scalar(fx);
+
+            codegen_inline_asm_inner(
+                fx,
+                &[InlineAsmTemplatePiece::String("aesimc xmm0, xmm0".to_string())],
+                &[CInlineAsmOperand::InOut {
+                    reg: InlineAsmRegOrRegClass::Reg(InlineAsmReg::X86(X86InlineAsmReg::xmm0)),
+                    _late: true,
+                    in_value: a,
+                    out_place: Some(ret),
+                }],
+                InlineAsmOptions::NOSTACK | InlineAsmOptions::PURE | InlineAsmOptions::NOMEM,
+            );
+        }
+
+        "llvm.x86.aesni.aesenc" => {
+            // https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_aesenc_si128&ig_expand=252
+            intrinsic_args!(fx, args => (a, round_key); intrinsic);
+
+            let a = a.load_scalar(fx);
+            let round_key = round_key.load_scalar(fx);
+
+            codegen_inline_asm_inner(
+                fx,
+                &[InlineAsmTemplatePiece::String("aesenc xmm0, xmm1".to_string())],
+                &[
+                    CInlineAsmOperand::InOut {
+                        reg: InlineAsmRegOrRegClass::Reg(InlineAsmReg::X86(X86InlineAsmReg::xmm0)),
+                        _late: true,
+                        in_value: a,
+                        out_place: Some(ret),
+                    },
+                    CInlineAsmOperand::In {
+                        reg: InlineAsmRegOrRegClass::Reg(InlineAsmReg::X86(X86InlineAsmReg::xmm1)),
+                        value: round_key,
+                    },
+                ],
+                InlineAsmOptions::NOSTACK | InlineAsmOptions::PURE | InlineAsmOptions::NOMEM,
+            );
+        }
+
+        "llvm.x86.aesni.aesenclast" => {
+            // https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_aesenclast_si128&ig_expand=257
+            intrinsic_args!(fx, args => (a, round_key); intrinsic);
+
+            let a = a.load_scalar(fx);
+            let round_key = round_key.load_scalar(fx);
+
+            codegen_inline_asm_inner(
+                fx,
+                &[InlineAsmTemplatePiece::String("aesenclast xmm0, xmm1".to_string())],
+                &[
+                    CInlineAsmOperand::InOut {
+                        reg: InlineAsmRegOrRegClass::Reg(InlineAsmReg::X86(X86InlineAsmReg::xmm0)),
+                        _late: true,
+                        in_value: a,
+                        out_place: Some(ret),
+                    },
+                    CInlineAsmOperand::In {
+                        reg: InlineAsmRegOrRegClass::Reg(InlineAsmReg::X86(X86InlineAsmReg::xmm1)),
+                        value: round_key,
+                    },
+                ],
+                InlineAsmOptions::NOSTACK | InlineAsmOptions::PURE | InlineAsmOptions::NOMEM,
+            );
+        }
+
+        "llvm.x86.aesni.aesdec" => {
+            // https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_aesdec_si128&ig_expand=242
+            intrinsic_args!(fx, args => (a, round_key); intrinsic);
+
+            let a = a.load_scalar(fx);
+            let round_key = round_key.load_scalar(fx);
+
+            codegen_inline_asm_inner(
+                fx,
+                &[InlineAsmTemplatePiece::String("aesdec xmm0, xmm1".to_string())],
+                &[
+                    CInlineAsmOperand::InOut {
+                        reg: InlineAsmRegOrRegClass::Reg(InlineAsmReg::X86(X86InlineAsmReg::xmm0)),
+                        _late: true,
+                        in_value: a,
+                        out_place: Some(ret),
+                    },
+                    CInlineAsmOperand::In {
+                        reg: InlineAsmRegOrRegClass::Reg(InlineAsmReg::X86(X86InlineAsmReg::xmm1)),
+                        value: round_key,
+                    },
+                ],
+                InlineAsmOptions::NOSTACK | InlineAsmOptions::PURE | InlineAsmOptions::NOMEM,
+            );
+        }
+
+        "llvm.x86.aesni.aesdeclast" => {
+            // https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_aesdeclast_si128&ig_expand=247
+            intrinsic_args!(fx, args => (a, round_key); intrinsic);
+
+            let a = a.load_scalar(fx);
+            let round_key = round_key.load_scalar(fx);
+
+            codegen_inline_asm_inner(
+                fx,
+                &[InlineAsmTemplatePiece::String("aesdeclast xmm0, xmm1".to_string())],
+                &[
+                    CInlineAsmOperand::InOut {
+                        reg: InlineAsmRegOrRegClass::Reg(InlineAsmReg::X86(X86InlineAsmReg::xmm0)),
+                        _late: true,
+                        in_value: a,
+                        out_place: Some(ret),
+                    },
+                    CInlineAsmOperand::In {
+                        reg: InlineAsmRegOrRegClass::Reg(InlineAsmReg::X86(X86InlineAsmReg::xmm1)),
+                        value: round_key,
+                    },
+                ],
+                InlineAsmOptions::NOSTACK | InlineAsmOptions::PURE | InlineAsmOptions::NOMEM,
+            );
+        }
+
+        "llvm.x86.sha1rnds4" => {
+            // https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_sha1rnds4_epu32&ig_expand=5877
+            intrinsic_args!(fx, args => (a, b, _func); intrinsic);
+
+            let a = a.load_scalar(fx);
+            let b = b.load_scalar(fx);
+
+            let func = if let Some(func) =
+                crate::constant::mir_operand_get_const_val(fx, &args[2].node)
+            {
+                func
+            } else {
+                fx.tcx
+                    .dcx()
+                    .span_fatal(span, "Func argument for `_mm_sha1rnds4_epu32` is not a constant");
+            };
+
+            let func = func.try_to_u8().unwrap_or_else(|_| panic!("kind not scalar: {:?}", func));
+
+            codegen_inline_asm_inner(
+                fx,
+                &[InlineAsmTemplatePiece::String(format!("sha1rnds4 xmm1, xmm2, {func}"))],
+                &[
+                    CInlineAsmOperand::InOut {
+                        reg: InlineAsmRegOrRegClass::Reg(InlineAsmReg::X86(X86InlineAsmReg::xmm1)),
+                        _late: true,
+                        in_value: a,
+                        out_place: Some(ret),
+                    },
+                    CInlineAsmOperand::In {
+                        reg: InlineAsmRegOrRegClass::Reg(InlineAsmReg::X86(X86InlineAsmReg::xmm2)),
+                        value: b,
+                    },
+                ],
+                InlineAsmOptions::NOSTACK | InlineAsmOptions::PURE | InlineAsmOptions::NOMEM,
+            );
+        }
+
+        "llvm.x86.sha1msg1" => {
+            // https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_sha1msg1_epu32&ig_expand=5874
+            intrinsic_args!(fx, args => (a, b); intrinsic);
+
+            let a = a.load_scalar(fx);
+            let b = b.load_scalar(fx);
+
+            codegen_inline_asm_inner(
+                fx,
+                &[InlineAsmTemplatePiece::String("sha1msg1 xmm1, xmm2".to_string())],
+                &[
+                    CInlineAsmOperand::InOut {
+                        reg: InlineAsmRegOrRegClass::Reg(InlineAsmReg::X86(X86InlineAsmReg::xmm1)),
+                        _late: true,
+                        in_value: a,
+                        out_place: Some(ret),
+                    },
+                    CInlineAsmOperand::In {
+                        reg: InlineAsmRegOrRegClass::Reg(InlineAsmReg::X86(X86InlineAsmReg::xmm2)),
+                        value: b,
+                    },
+                ],
+                InlineAsmOptions::NOSTACK | InlineAsmOptions::PURE | InlineAsmOptions::NOMEM,
+            );
+        }
+
+        "llvm.x86.sha1msg2" => {
+            // https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_sha1msg2_epu32&ig_expand=5875
+            intrinsic_args!(fx, args => (a, b); intrinsic);
+
+            let a = a.load_scalar(fx);
+            let b = b.load_scalar(fx);
+
+            codegen_inline_asm_inner(
+                fx,
+                &[InlineAsmTemplatePiece::String("sha1msg2 xmm1, xmm2".to_string())],
+                &[
+                    CInlineAsmOperand::InOut {
+                        reg: InlineAsmRegOrRegClass::Reg(InlineAsmReg::X86(X86InlineAsmReg::xmm1)),
+                        _late: true,
+                        in_value: a,
+                        out_place: Some(ret),
+                    },
+                    CInlineAsmOperand::In {
+                        reg: InlineAsmRegOrRegClass::Reg(InlineAsmReg::X86(X86InlineAsmReg::xmm2)),
+                        value: b,
+                    },
+                ],
+                InlineAsmOptions::NOSTACK | InlineAsmOptions::PURE | InlineAsmOptions::NOMEM,
+            );
+        }
+
+        "llvm.x86.sha1nexte" => {
+            // https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_sha1nexte_epu32&ig_expand=5876
+            intrinsic_args!(fx, args => (a, b); intrinsic);
+
+            let a = a.load_scalar(fx);
+            let b = b.load_scalar(fx);
+
+            codegen_inline_asm_inner(
+                fx,
+                &[InlineAsmTemplatePiece::String("sha1nexte xmm1, xmm2".to_string())],
+                &[
+                    CInlineAsmOperand::InOut {
+                        reg: InlineAsmRegOrRegClass::Reg(InlineAsmReg::X86(X86InlineAsmReg::xmm1)),
+                        _late: true,
+                        in_value: a,
+                        out_place: Some(ret),
+                    },
+                    CInlineAsmOperand::In {
+                        reg: InlineAsmRegOrRegClass::Reg(InlineAsmReg::X86(X86InlineAsmReg::xmm2)),
+                        value: b,
+                    },
+                ],
+                InlineAsmOptions::NOSTACK | InlineAsmOptions::PURE | InlineAsmOptions::NOMEM,
+            );
+        }
+
+        "llvm.x86.sha256rnds2" => {
+            // https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_sha256rnds2_epu32&ig_expand=5977
+            intrinsic_args!(fx, args => (a, b, k); intrinsic);
+
+            let a = a.load_scalar(fx);
+            let b = b.load_scalar(fx);
+            let k = k.load_scalar(fx);
+
+            codegen_inline_asm_inner(
+                fx,
+                &[InlineAsmTemplatePiece::String("sha256rnds2 xmm1, xmm2".to_string())],
+                &[
+                    CInlineAsmOperand::InOut {
+                        reg: InlineAsmRegOrRegClass::Reg(InlineAsmReg::X86(X86InlineAsmReg::xmm1)),
+                        _late: true,
+                        in_value: a,
+                        out_place: Some(ret),
+                    },
+                    CInlineAsmOperand::In {
+                        reg: InlineAsmRegOrRegClass::Reg(InlineAsmReg::X86(X86InlineAsmReg::xmm2)),
+                        value: b,
+                    },
+                    // Implicit argument to the sha256rnds2 instruction
+                    CInlineAsmOperand::In {
+                        reg: InlineAsmRegOrRegClass::Reg(InlineAsmReg::X86(X86InlineAsmReg::xmm0)),
+                        value: k,
+                    },
+                ],
+                InlineAsmOptions::NOSTACK | InlineAsmOptions::PURE | InlineAsmOptions::NOMEM,
+            );
+        }
+
+        "llvm.x86.sha256msg1" => {
+            // https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_sha256msg1_epu32&ig_expand=5975
+            intrinsic_args!(fx, args => (a, b); intrinsic);
+
+            let a = a.load_scalar(fx);
+            let b = b.load_scalar(fx);
+
+            codegen_inline_asm_inner(
+                fx,
+                &[InlineAsmTemplatePiece::String("sha256msg1 xmm1, xmm2".to_string())],
+                &[
+                    CInlineAsmOperand::InOut {
+                        reg: InlineAsmRegOrRegClass::Reg(InlineAsmReg::X86(X86InlineAsmReg::xmm1)),
+                        _late: true,
+                        in_value: a,
+                        out_place: Some(ret),
+                    },
+                    CInlineAsmOperand::In {
+                        reg: InlineAsmRegOrRegClass::Reg(InlineAsmReg::X86(X86InlineAsmReg::xmm2)),
+                        value: b,
+                    },
+                ],
+                InlineAsmOptions::NOSTACK | InlineAsmOptions::PURE | InlineAsmOptions::NOMEM,
+            );
+        }
+
+        "llvm.x86.sha256msg2" => {
+            // https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_sha256msg2_epu32&ig_expand=5976
+            intrinsic_args!(fx, args => (a, b); intrinsic);
+
+            let a = a.load_scalar(fx);
+            let b = b.load_scalar(fx);
+
+            codegen_inline_asm_inner(
+                fx,
+                &[InlineAsmTemplatePiece::String("sha256msg2 xmm1, xmm2".to_string())],
+                &[
+                    CInlineAsmOperand::InOut {
+                        reg: InlineAsmRegOrRegClass::Reg(InlineAsmReg::X86(X86InlineAsmReg::xmm1)),
+                        _late: true,
+                        in_value: a,
+                        out_place: Some(ret),
+                    },
+                    CInlineAsmOperand::In {
+                        reg: InlineAsmRegOrRegClass::Reg(InlineAsmReg::X86(X86InlineAsmReg::xmm2)),
+                        value: b,
+                    },
+                ],
+                InlineAsmOptions::NOSTACK | InlineAsmOptions::PURE | InlineAsmOptions::NOMEM,
+            );
+        }
+
+        "llvm.x86.avx.ptestz.256" => {
+            // https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_testz_si256&ig_expand=6945
+            intrinsic_args!(fx, args => (a, b); intrinsic);
+
+            assert_eq!(a.layout(), b.layout());
+            let layout = a.layout();
+
+            let (lane_count, lane_ty) = layout.ty.simd_size_and_type(fx.tcx);
+            assert_eq!(lane_ty, fx.tcx.types.i64);
+            assert_eq!(ret.layout().ty, fx.tcx.types.i32);
+            assert_eq!(lane_count, 4);
+
+            let a_lane0 = a.value_lane(fx, 0).load_scalar(fx);
+            let a_lane1 = a.value_lane(fx, 1).load_scalar(fx);
+            let a_lane2 = a.value_lane(fx, 2).load_scalar(fx);
+            let a_lane3 = a.value_lane(fx, 3).load_scalar(fx);
+            let b_lane0 = b.value_lane(fx, 0).load_scalar(fx);
+            let b_lane1 = b.value_lane(fx, 1).load_scalar(fx);
+            let b_lane2 = b.value_lane(fx, 2).load_scalar(fx);
+            let b_lane3 = b.value_lane(fx, 3).load_scalar(fx);
+
+            let zero0 = fx.bcx.ins().band(a_lane0, b_lane0);
+            let zero1 = fx.bcx.ins().band(a_lane1, b_lane1);
+            let zero2 = fx.bcx.ins().band(a_lane2, b_lane2);
+            let zero3 = fx.bcx.ins().band(a_lane3, b_lane3);
+
+            let all_zero0 = fx.bcx.ins().bor(zero0, zero1);
+            let all_zero1 = fx.bcx.ins().bor(zero2, zero3);
+            let all_zero = fx.bcx.ins().bor(all_zero0, all_zero1);
+
+            let res = fx.bcx.ins().icmp_imm(IntCC::Equal, all_zero, 0);
+            let res = CValue::by_val(
+                fx.bcx.ins().uextend(types::I32, res),
+                fx.layout_of(fx.tcx.types.i32),
+            );
+            ret.write_cvalue(fx, res);
+        }
+
+        _ => {
+            fx.tcx
+                .dcx()
+                .warn(format!("unsupported x86 llvm intrinsic {}; replacing with trap", intrinsic));
+            crate::trap::trap_unimplemented(fx, intrinsic);
+            return;
+        }
+    }
+
+    let dest = target.expect("all llvm intrinsics used by stdlib should return");
+    let ret_block = fx.get_block(dest);
+    fx.bcx.ins().jump(ret_block, &[]);
+}
+
+// llvm.x86.avx2.vperm2i128
+// llvm.x86.ssse3.pshuf.b.128
+// llvm.x86.avx2.pshuf.b
+
+fn llvm_add_sub<'tcx>(
+    fx: &mut FunctionCx<'_, '_, 'tcx>,
+    bin_op: BinOp,
+    cb_in: Value,
+    a: CValue<'tcx>,
+    b: CValue<'tcx>,
+) -> (Value, Value) {
+    assert_eq!(a.layout().ty, b.layout().ty);
+
+    // c + carry -> c + first intermediate carry or borrow respectively
+    let int0 = crate::num::codegen_checked_int_binop(fx, bin_op, a, b);
+    let c = int0.value_field(fx, FieldIdx::ZERO);
+    let cb0 = int0.value_field(fx, FieldIdx::new(1)).load_scalar(fx);
+
+    // c + carry -> c + second intermediate carry or borrow respectively
+    let clif_ty = fx.clif_type(a.layout().ty).unwrap();
+    let cb_in_as_int = fx.bcx.ins().uextend(clif_ty, cb_in);
+    let cb_in_as_int = CValue::by_val(cb_in_as_int, fx.layout_of(a.layout().ty));
+    let int1 = crate::num::codegen_checked_int_binop(fx, bin_op, c, cb_in_as_int);
+    let (c, cb1) = int1.load_scalar_pair(fx);
+
+    // carry0 | carry1 -> carry or borrow respectively
+    let cb_out = fx.bcx.ins().bor(cb0, cb1);
+
+    (cb_out, c)
+}
+
+enum PackSize {
+    U8,
+    U16,
+    S8,
+    S16,
+}
+
+impl PackSize {
+    fn ret_clif_type(&self) -> Type {
+        match self {
+            Self::U8 | Self::S8 => types::I8,
+            Self::U16 | Self::S16 => types::I16,
+        }
+    }
+    fn src_clif_type(&self) -> Type {
+        match self {
+            Self::U8 | Self::S8 => types::I16,
+            Self::U16 | Self::S16 => types::I32,
+        }
+    }
+    fn src_ty<'tcx>(&self, tcx: TyCtxt<'tcx>) -> Ty<'tcx> {
+        match self {
+            Self::U8 | Self::S8 => tcx.types.i16,
+            Self::U16 | Self::S16 => tcx.types.i32,
+        }
+    }
+    fn ret_ty<'tcx>(&self, tcx: TyCtxt<'tcx>) -> Ty<'tcx> {
+        match self {
+            Self::U8 => tcx.types.u8,
+            Self::S8 => tcx.types.i8,
+            Self::U16 => tcx.types.u16,
+            Self::S16 => tcx.types.i16,
+        }
+    }
+    fn max(&self) -> i64 {
+        match self {
+            Self::U8 => u8::MAX as u64 as i64,
+            Self::S8 => i8::MAX as u8 as u64 as i64,
+            Self::U16 => u16::MAX as u64 as i64,
+            Self::S16 => i16::MAX as u64 as u64 as i64,
+        }
+    }
+    fn min(&self) -> i64 {
+        match self {
+            Self::U8 | Self::U16 => 0,
+            Self::S8 => i16::from(i8::MIN) as u16 as i64,
+            Self::S16 => i32::from(i16::MIN) as u32 as i64,
+        }
+    }
+}
+
+enum PackWidth {
+    Sse = 1,
+    Avx = 2,
+}
+impl PackWidth {
+    fn divisor(&self) -> u64 {
+        match self {
+            Self::Sse => 1,
+            Self::Avx => 2,
+        }
+    }
+}
+
+/// Implement an x86 pack instruction with the intrinsic `_mm{,256}pack{us,s}_epi{16,32}`.
+/// Validated for correctness against LLVM, see commit `c8f5d35508e062bd2d95e6c03429bfec831db6d3`.
+fn pack_instruction<'tcx>(
+    fx: &mut FunctionCx<'_, '_, 'tcx>,
+    a: CValue<'tcx>,
+    b: CValue<'tcx>,
+    ret: CPlace<'tcx>,
+    ret_size: PackSize,
+    width: PackWidth,
+) {
+    assert_eq!(a.layout(), b.layout());
+    let layout = a.layout();
+
+    let (src_lane_count, src_lane_ty) = layout.ty.simd_size_and_type(fx.tcx);
+    let (ret_lane_count, ret_lane_ty) = ret.layout().ty.simd_size_and_type(fx.tcx);
+    assert_eq!(src_lane_ty, ret_size.src_ty(fx.tcx));
+    assert_eq!(ret_lane_ty, ret_size.ret_ty(fx.tcx));
+    assert_eq!(src_lane_count * 2, ret_lane_count);
+
+    let min = fx.bcx.ins().iconst(ret_size.src_clif_type(), ret_size.min());
+    let max = fx.bcx.ins().iconst(ret_size.src_clif_type(), ret_size.max());
+    let ret_lane_layout = fx.layout_of(ret_size.ret_ty(fx.tcx));
+
+    let mut round = |source: CValue<'tcx>, source_offset: u64, dest_offset: u64| {
+        let step_amount = src_lane_count / width.divisor();
+        let dest_offset = step_amount * dest_offset;
+        for idx in 0..step_amount {
+            let lane = source.value_lane(fx, step_amount * source_offset + idx).load_scalar(fx);
+            let sat = fx.bcx.ins().smax(lane, min);
+            let sat = match ret_size {
+                PackSize::U8 | PackSize::U16 => fx.bcx.ins().umin(sat, max),
+                PackSize::S8 | PackSize::S16 => fx.bcx.ins().smin(sat, max),
+            };
+            let res = fx.bcx.ins().ireduce(ret_size.ret_clif_type(), sat);
+            let res_lane = CValue::by_val(res, ret_lane_layout);
+            ret.place_lane(fx, dest_offset + idx).write_cvalue(fx, res_lane);
+        }
+    };
+
+    round(a, 0, 0);
+    round(b, 0, 1);
+
+    if let PackWidth::Avx = width {
+        round(a, 1, 2);
+        round(b, 1, 3);
+    }
+}
diff --git a/compiler/rustc_codegen_cranelift/src/intrinsics/mod.rs b/compiler/rustc_codegen_cranelift/src/intrinsics/mod.rs
new file mode 100644
index 00000000000..cafdc051db5
--- /dev/null
+++ b/compiler/rustc_codegen_cranelift/src/intrinsics/mod.rs
@@ -0,0 +1,1281 @@
+//! Codegen of intrinsics. This includes `extern "rust-intrinsic"`, `extern "platform-intrinsic"`
+//! and LLVM intrinsics that have symbol names starting with `llvm.`.
+
+macro_rules! intrinsic_args {
+    ($fx:expr, $args:expr => ($($arg:tt),*); $intrinsic:expr) => {
+        #[allow(unused_parens)]
+        let ($($arg),*) = if let [$($arg),*] = $args {
+            ($(codegen_operand($fx, &($arg).node)),*)
+        } else {
+            $crate::intrinsics::bug_on_incorrect_arg_count($intrinsic);
+        };
+    }
+}
+
+mod llvm;
+mod llvm_aarch64;
+mod llvm_x86;
+mod simd;
+
+use cranelift_codegen::ir::AtomicRmwOp;
+use rustc_middle::ty;
+use rustc_middle::ty::layout::{HasParamEnv, ValidityRequirement};
+use rustc_middle::ty::print::{with_no_trimmed_paths, with_no_visible_paths};
+use rustc_middle::ty::GenericArgsRef;
+use rustc_span::source_map::Spanned;
+use rustc_span::symbol::{sym, Symbol};
+
+pub(crate) use self::llvm::codegen_llvm_intrinsic_call;
+use crate::cast::clif_intcast;
+use crate::prelude::*;
+
+fn bug_on_incorrect_arg_count(intrinsic: impl std::fmt::Display) -> ! {
+    bug!("wrong number of args for intrinsic {}", intrinsic);
+}
+
+fn report_atomic_type_validation_error<'tcx>(
+    fx: &mut FunctionCx<'_, '_, 'tcx>,
+    intrinsic: Symbol,
+    span: Span,
+    ty: Ty<'tcx>,
+) {
+    fx.tcx.dcx().span_err(
+        span,
+        format!(
+            "`{}` intrinsic: expected basic integer or raw pointer type, found `{:?}`",
+            intrinsic, ty
+        ),
+    );
+    // Prevent verifier error
+    fx.bcx.ins().trap(TrapCode::UnreachableCodeReached);
+}
+
+pub(crate) fn clif_vector_type<'tcx>(tcx: TyCtxt<'tcx>, layout: TyAndLayout<'tcx>) -> Type {
+    let (element, count) = match layout.abi {
+        Abi::Vector { element, count } => (element, count),
+        _ => unreachable!(),
+    };
+
+    scalar_to_clif_type(tcx, element).by(u32::try_from(count).unwrap()).unwrap()
+}
+
+fn simd_for_each_lane<'tcx>(
+    fx: &mut FunctionCx<'_, '_, 'tcx>,
+    val: CValue<'tcx>,
+    ret: CPlace<'tcx>,
+    f: &dyn Fn(&mut FunctionCx<'_, '_, 'tcx>, Ty<'tcx>, Ty<'tcx>, Value) -> Value,
+) {
+    let layout = val.layout();
+
+    let (lane_count, lane_ty) = layout.ty.simd_size_and_type(fx.tcx);
+    let lane_layout = fx.layout_of(lane_ty);
+    let (ret_lane_count, ret_lane_ty) = ret.layout().ty.simd_size_and_type(fx.tcx);
+    let ret_lane_layout = fx.layout_of(ret_lane_ty);
+    assert_eq!(lane_count, ret_lane_count);
+
+    for lane_idx in 0..lane_count {
+        let lane = val.value_lane(fx, lane_idx).load_scalar(fx);
+
+        let res_lane = f(fx, lane_layout.ty, ret_lane_layout.ty, lane);
+        let res_lane = CValue::by_val(res_lane, ret_lane_layout);
+
+        ret.place_lane(fx, lane_idx).write_cvalue(fx, res_lane);
+    }
+}
+
+fn simd_pair_for_each_lane_typed<'tcx>(
+    fx: &mut FunctionCx<'_, '_, 'tcx>,
+    x: CValue<'tcx>,
+    y: CValue<'tcx>,
+    ret: CPlace<'tcx>,
+    f: &dyn Fn(&mut FunctionCx<'_, '_, 'tcx>, CValue<'tcx>, CValue<'tcx>) -> CValue<'tcx>,
+) {
+    assert_eq!(x.layout(), y.layout());
+    let layout = x.layout();
+
+    let (lane_count, _lane_ty) = layout.ty.simd_size_and_type(fx.tcx);
+    let (ret_lane_count, _ret_lane_ty) = ret.layout().ty.simd_size_and_type(fx.tcx);
+    assert_eq!(lane_count, ret_lane_count);
+
+    for lane_idx in 0..lane_count {
+        let x_lane = x.value_lane(fx, lane_idx);
+        let y_lane = y.value_lane(fx, lane_idx);
+
+        let res_lane = f(fx, x_lane, y_lane);
+
+        ret.place_lane(fx, lane_idx).write_cvalue(fx, res_lane);
+    }
+}
+
+fn simd_pair_for_each_lane<'tcx>(
+    fx: &mut FunctionCx<'_, '_, 'tcx>,
+    x: CValue<'tcx>,
+    y: CValue<'tcx>,
+    ret: CPlace<'tcx>,
+    f: &dyn Fn(&mut FunctionCx<'_, '_, 'tcx>, Ty<'tcx>, Ty<'tcx>, Value, Value) -> Value,
+) {
+    assert_eq!(x.layout(), y.layout());
+    let layout = x.layout();
+
+    let (lane_count, lane_ty) = layout.ty.simd_size_and_type(fx.tcx);
+    let lane_layout = fx.layout_of(lane_ty);
+    let (ret_lane_count, ret_lane_ty) = ret.layout().ty.simd_size_and_type(fx.tcx);
+    let ret_lane_layout = fx.layout_of(ret_lane_ty);
+    assert_eq!(lane_count, ret_lane_count);
+
+    for lane_idx in 0..lane_count {
+        let x_lane = x.value_lane(fx, lane_idx).load_scalar(fx);
+        let y_lane = y.value_lane(fx, lane_idx).load_scalar(fx);
+
+        let res_lane = f(fx, lane_layout.ty, ret_lane_layout.ty, x_lane, y_lane);
+        let res_lane = CValue::by_val(res_lane, ret_lane_layout);
+
+        ret.place_lane(fx, lane_idx).write_cvalue(fx, res_lane);
+    }
+}
+
+fn simd_horizontal_pair_for_each_lane<'tcx>(
+    fx: &mut FunctionCx<'_, '_, 'tcx>,
+    x: CValue<'tcx>,
+    y: CValue<'tcx>,
+    ret: CPlace<'tcx>,
+    f: &dyn Fn(&mut FunctionCx<'_, '_, 'tcx>, Ty<'tcx>, Ty<'tcx>, Value, Value) -> Value,
+) {
+    assert_eq!(x.layout(), y.layout());
+    let layout = x.layout();
+
+    let (lane_count, lane_ty) = layout.ty.simd_size_and_type(fx.tcx);
+    let lane_layout = fx.layout_of(lane_ty);
+    let (ret_lane_count, ret_lane_ty) = ret.layout().ty.simd_size_and_type(fx.tcx);
+    let ret_lane_layout = fx.layout_of(ret_lane_ty);
+    assert_eq!(lane_count, ret_lane_count);
+
+    for lane_idx in 0..lane_count {
+        let src = if lane_idx < (lane_count / 2) { x } else { y };
+        let src_idx = lane_idx % (lane_count / 2);
+
+        let lhs_lane = src.value_lane(fx, src_idx * 2).load_scalar(fx);
+        let rhs_lane = src.value_lane(fx, src_idx * 2 + 1).load_scalar(fx);
+
+        let res_lane = f(fx, lane_layout.ty, ret_lane_layout.ty, lhs_lane, rhs_lane);
+        let res_lane = CValue::by_val(res_lane, ret_lane_layout);
+
+        ret.place_lane(fx, lane_idx).write_cvalue(fx, res_lane);
+    }
+}
+
+fn simd_trio_for_each_lane<'tcx>(
+    fx: &mut FunctionCx<'_, '_, 'tcx>,
+    x: CValue<'tcx>,
+    y: CValue<'tcx>,
+    z: CValue<'tcx>,
+    ret: CPlace<'tcx>,
+    f: &dyn Fn(&mut FunctionCx<'_, '_, 'tcx>, Ty<'tcx>, Ty<'tcx>, Value, Value, Value) -> Value,
+) {
+    assert_eq!(x.layout(), y.layout());
+    let layout = x.layout();
+
+    let (lane_count, lane_ty) = layout.ty.simd_size_and_type(fx.tcx);
+    let lane_layout = fx.layout_of(lane_ty);
+    let (ret_lane_count, ret_lane_ty) = ret.layout().ty.simd_size_and_type(fx.tcx);
+    let ret_lane_layout = fx.layout_of(ret_lane_ty);
+    assert_eq!(lane_count, ret_lane_count);
+
+    for lane_idx in 0..lane_count {
+        let x_lane = x.value_lane(fx, lane_idx).load_scalar(fx);
+        let y_lane = y.value_lane(fx, lane_idx).load_scalar(fx);
+        let z_lane = z.value_lane(fx, lane_idx).load_scalar(fx);
+
+        let res_lane = f(fx, lane_layout.ty, ret_lane_layout.ty, x_lane, y_lane, z_lane);
+        let res_lane = CValue::by_val(res_lane, ret_lane_layout);
+
+        ret.place_lane(fx, lane_idx).write_cvalue(fx, res_lane);
+    }
+}
+
+fn simd_reduce<'tcx>(
+    fx: &mut FunctionCx<'_, '_, 'tcx>,
+    val: CValue<'tcx>,
+    acc: Option<Value>,
+    ret: CPlace<'tcx>,
+    f: &dyn Fn(&mut FunctionCx<'_, '_, 'tcx>, Ty<'tcx>, Value, Value) -> Value,
+) {
+    let (lane_count, lane_ty) = val.layout().ty.simd_size_and_type(fx.tcx);
+    let lane_layout = fx.layout_of(lane_ty);
+    assert_eq!(lane_layout, ret.layout());
+
+    let (mut res_val, start_lane) =
+        if let Some(acc) = acc { (acc, 0) } else { (val.value_lane(fx, 0).load_scalar(fx), 1) };
+    for lane_idx in start_lane..lane_count {
+        let lane = val.value_lane(fx, lane_idx).load_scalar(fx);
+        res_val = f(fx, lane_layout.ty, res_val, lane);
+    }
+    let res = CValue::by_val(res_val, lane_layout);
+    ret.write_cvalue(fx, res);
+}
+
+// FIXME move all uses to `simd_reduce`
+fn simd_reduce_bool<'tcx>(
+    fx: &mut FunctionCx<'_, '_, 'tcx>,
+    val: CValue<'tcx>,
+    ret: CPlace<'tcx>,
+    f: &dyn Fn(&mut FunctionCx<'_, '_, 'tcx>, Value, Value) -> Value,
+) {
+    let (lane_count, _lane_ty) = val.layout().ty.simd_size_and_type(fx.tcx);
+    assert!(ret.layout().ty.is_bool());
+
+    let res_val = val.value_lane(fx, 0).load_scalar(fx);
+    let mut res_val = fx.bcx.ins().band_imm(res_val, 1); // mask to boolean
+    for lane_idx in 1..lane_count {
+        let lane = val.value_lane(fx, lane_idx).load_scalar(fx);
+        let lane = fx.bcx.ins().band_imm(lane, 1); // mask to boolean
+        res_val = f(fx, res_val, lane);
+    }
+    let res_val = if fx.bcx.func.dfg.value_type(res_val) != types::I8 {
+        fx.bcx.ins().ireduce(types::I8, res_val)
+    } else {
+        res_val
+    };
+    let res = CValue::by_val(res_val, ret.layout());
+    ret.write_cvalue(fx, res);
+}
+
+fn bool_to_zero_or_max_uint<'tcx>(
+    fx: &mut FunctionCx<'_, '_, 'tcx>,
+    ty: Ty<'tcx>,
+    val: Value,
+) -> Value {
+    let ty = fx.clif_type(ty).unwrap();
+
+    let int_ty = match ty {
+        types::F32 => types::I32,
+        types::F64 => types::I64,
+        ty => ty,
+    };
+
+    let mut res = fx.bcx.ins().bmask(int_ty, val);
+
+    if ty.is_float() {
+        res = codegen_bitcast(fx, ty, res);
+    }
+
+    res
+}
+
+pub(crate) fn codegen_intrinsic_call<'tcx>(
+    fx: &mut FunctionCx<'_, '_, 'tcx>,
+    instance: Instance<'tcx>,
+    args: &[Spanned<mir::Operand<'tcx>>],
+    destination: CPlace<'tcx>,
+    target: Option<BasicBlock>,
+    source_info: mir::SourceInfo,
+) -> Result<(), Instance<'tcx>> {
+    let intrinsic = fx.tcx.item_name(instance.def_id());
+    let instance_args = instance.args;
+
+    if intrinsic.as_str().starts_with("simd_") {
+        self::simd::codegen_simd_intrinsic_call(
+            fx,
+            intrinsic,
+            instance_args,
+            args,
+            destination,
+            target.expect("target for simd intrinsic"),
+            source_info.span,
+        );
+    } else if codegen_float_intrinsic_call(fx, intrinsic, args, destination) {
+        let ret_block = fx.get_block(target.expect("target for float intrinsic"));
+        fx.bcx.ins().jump(ret_block, &[]);
+    } else {
+        codegen_regular_intrinsic_call(
+            fx,
+            instance,
+            intrinsic,
+            instance_args,
+            args,
+            destination,
+            target,
+            source_info,
+        )?;
+    }
+    Ok(())
+}
+
+fn codegen_float_intrinsic_call<'tcx>(
+    fx: &mut FunctionCx<'_, '_, 'tcx>,
+    intrinsic: Symbol,
+    args: &[Spanned<mir::Operand<'tcx>>],
+    ret: CPlace<'tcx>,
+) -> bool {
+    let (name, arg_count, ty, clif_ty) = match intrinsic {
+        sym::expf32 => ("expf", 1, fx.tcx.types.f32, types::F32),
+        sym::expf64 => ("exp", 1, fx.tcx.types.f64, types::F64),
+        sym::exp2f32 => ("exp2f", 1, fx.tcx.types.f32, types::F32),
+        sym::exp2f64 => ("exp2", 1, fx.tcx.types.f64, types::F64),
+        sym::sqrtf32 => ("sqrtf", 1, fx.tcx.types.f32, types::F32),
+        sym::sqrtf64 => ("sqrt", 1, fx.tcx.types.f64, types::F64),
+        sym::powif32 => ("__powisf2", 2, fx.tcx.types.f32, types::F32), // compiler-builtins
+        sym::powif64 => ("__powidf2", 2, fx.tcx.types.f64, types::F64), // compiler-builtins
+        sym::powf32 => ("powf", 2, fx.tcx.types.f32, types::F32),
+        sym::powf64 => ("pow", 2, fx.tcx.types.f64, types::F64),
+        sym::logf32 => ("logf", 1, fx.tcx.types.f32, types::F32),
+        sym::logf64 => ("log", 1, fx.tcx.types.f64, types::F64),
+        sym::log2f32 => ("log2f", 1, fx.tcx.types.f32, types::F32),
+        sym::log2f64 => ("log2", 1, fx.tcx.types.f64, types::F64),
+        sym::log10f32 => ("log10f", 1, fx.tcx.types.f32, types::F32),
+        sym::log10f64 => ("log10", 1, fx.tcx.types.f64, types::F64),
+        sym::fabsf32 => ("fabsf", 1, fx.tcx.types.f32, types::F32),
+        sym::fabsf64 => ("fabs", 1, fx.tcx.types.f64, types::F64),
+        sym::fmaf32 => ("fmaf", 3, fx.tcx.types.f32, types::F32),
+        sym::fmaf64 => ("fma", 3, fx.tcx.types.f64, types::F64),
+        sym::copysignf32 => ("copysignf", 2, fx.tcx.types.f32, types::F32),
+        sym::copysignf64 => ("copysign", 2, fx.tcx.types.f64, types::F64),
+        sym::floorf32 => ("floorf", 1, fx.tcx.types.f32, types::F32),
+        sym::floorf64 => ("floor", 1, fx.tcx.types.f64, types::F64),
+        sym::ceilf32 => ("ceilf", 1, fx.tcx.types.f32, types::F32),
+        sym::ceilf64 => ("ceil", 1, fx.tcx.types.f64, types::F64),
+        sym::truncf32 => ("truncf", 1, fx.tcx.types.f32, types::F32),
+        sym::truncf64 => ("trunc", 1, fx.tcx.types.f64, types::F64),
+        sym::rintf32 => ("rintf", 1, fx.tcx.types.f32, types::F32),
+        sym::rintf64 => ("rint", 1, fx.tcx.types.f64, types::F64),
+        sym::roundf32 => ("roundf", 1, fx.tcx.types.f32, types::F32),
+        sym::roundf64 => ("round", 1, fx.tcx.types.f64, types::F64),
+        sym::roundevenf32 => ("roundevenf", 1, fx.tcx.types.f32, types::F32),
+        sym::roundevenf64 => ("roundeven", 1, fx.tcx.types.f64, types::F64),
+        sym::nearbyintf32 => ("nearbyintf", 1, fx.tcx.types.f32, types::F32),
+        sym::nearbyintf64 => ("nearbyint", 1, fx.tcx.types.f64, types::F64),
+        sym::sinf32 => ("sinf", 1, fx.tcx.types.f32, types::F32),
+        sym::sinf64 => ("sin", 1, fx.tcx.types.f64, types::F64),
+        sym::cosf32 => ("cosf", 1, fx.tcx.types.f32, types::F32),
+        sym::cosf64 => ("cos", 1, fx.tcx.types.f64, types::F64),
+        _ => return false,
+    };
+
+    if args.len() != arg_count {
+        bug!("wrong number of args for intrinsic {:?}", intrinsic);
+    }
+
+    let (a, b, c);
+    let args = match args {
+        [x] => {
+            a = [codegen_operand(fx, &x.node).load_scalar(fx)];
+            &a as &[_]
+        }
+        [x, y] => {
+            b = [
+                codegen_operand(fx, &x.node).load_scalar(fx),
+                codegen_operand(fx, &y.node).load_scalar(fx),
+            ];
+            &b
+        }
+        [x, y, z] => {
+            c = [
+                codegen_operand(fx, &x.node).load_scalar(fx),
+                codegen_operand(fx, &y.node).load_scalar(fx),
+                codegen_operand(fx, &z.node).load_scalar(fx),
+            ];
+            &c
+        }
+        _ => unreachable!(),
+    };
+
+    let layout = fx.layout_of(ty);
+    let res = match intrinsic {
+        sym::fmaf32 | sym::fmaf64 => {
+            CValue::by_val(fx.bcx.ins().fma(args[0], args[1], args[2]), layout)
+        }
+        sym::copysignf32 | sym::copysignf64 => {
+            CValue::by_val(fx.bcx.ins().fcopysign(args[0], args[1]), layout)
+        }
+        sym::fabsf32
+        | sym::fabsf64
+        | sym::floorf32
+        | sym::floorf64
+        | sym::ceilf32
+        | sym::ceilf64
+        | sym::truncf32
+        | sym::truncf64
+        | sym::nearbyintf32
+        | sym::nearbyintf64
+        | sym::sqrtf32
+        | sym::sqrtf64 => {
+            let val = match intrinsic {
+                sym::fabsf32 | sym::fabsf64 => fx.bcx.ins().fabs(args[0]),
+                sym::floorf32 | sym::floorf64 => fx.bcx.ins().floor(args[0]),
+                sym::ceilf32 | sym::ceilf64 => fx.bcx.ins().ceil(args[0]),
+                sym::truncf32 | sym::truncf64 => fx.bcx.ins().trunc(args[0]),
+                sym::nearbyintf32 | sym::nearbyintf64 => fx.bcx.ins().nearest(args[0]),
+                sym::sqrtf32 | sym::sqrtf64 => fx.bcx.ins().sqrt(args[0]),
+                _ => unreachable!(),
+            };
+
+            CValue::by_val(val, layout)
+        }
+
+        // These intrinsics aren't supported natively by Cranelift.
+        // Lower them to a libcall.
+        sym::powif32 | sym::powif64 => {
+            let input_tys: Vec<_> = vec![AbiParam::new(clif_ty), AbiParam::new(types::I32)];
+            let ret_val = fx.lib_call(name, input_tys, vec![AbiParam::new(clif_ty)], &args)[0];
+            CValue::by_val(ret_val, fx.layout_of(ty))
+        }
+        _ => {
+            let input_tys: Vec<_> = args.iter().map(|_| AbiParam::new(clif_ty)).collect();
+            let ret_val = fx.lib_call(name, input_tys, vec![AbiParam::new(clif_ty)], &args)[0];
+            CValue::by_val(ret_val, fx.layout_of(ty))
+        }
+    };
+
+    ret.write_cvalue(fx, res);
+
+    true
+}
+
+fn codegen_regular_intrinsic_call<'tcx>(
+    fx: &mut FunctionCx<'_, '_, 'tcx>,
+    instance: Instance<'tcx>,
+    intrinsic: Symbol,
+    generic_args: GenericArgsRef<'tcx>,
+    args: &[Spanned<mir::Operand<'tcx>>],
+    ret: CPlace<'tcx>,
+    destination: Option<BasicBlock>,
+    source_info: mir::SourceInfo,
+) -> Result<(), Instance<'tcx>> {
+    assert_eq!(generic_args, instance.args);
+    let usize_layout = fx.layout_of(fx.tcx.types.usize);
+
+    match intrinsic {
+        sym::abort => {
+            fx.bcx.ins().trap(TrapCode::User(0));
+            return Ok(());
+        }
+        sym::likely | sym::unlikely => {
+            intrinsic_args!(fx, args => (a); intrinsic);
+
+            ret.write_cvalue(fx, a);
+        }
+        sym::breakpoint => {
+            intrinsic_args!(fx, args => (); intrinsic);
+
+            fx.bcx.ins().debugtrap();
+        }
+        sym::copy => {
+            intrinsic_args!(fx, args => (src, dst, count); intrinsic);
+            let src = src.load_scalar(fx);
+            let dst = dst.load_scalar(fx);
+            let count = count.load_scalar(fx);
+
+            let elem_ty = generic_args.type_at(0);
+            let elem_size: u64 = fx.layout_of(elem_ty).size.bytes();
+            assert_eq!(args.len(), 3);
+            let byte_amount =
+                if elem_size != 1 { fx.bcx.ins().imul_imm(count, elem_size as i64) } else { count };
+
+            // FIXME emit_small_memmove
+            fx.bcx.call_memmove(fx.target_config, dst, src, byte_amount);
+        }
+        sym::volatile_copy_memory | sym::volatile_copy_nonoverlapping_memory => {
+            // NOTE: the volatile variants have src and dst swapped
+            intrinsic_args!(fx, args => (dst, src, count); intrinsic);
+            let dst = dst.load_scalar(fx);
+            let src = src.load_scalar(fx);
+            let count = count.load_scalar(fx);
+
+            let elem_ty = generic_args.type_at(0);
+            let elem_size: u64 = fx.layout_of(elem_ty).size.bytes();
+            assert_eq!(args.len(), 3);
+            let byte_amount =
+                if elem_size != 1 { fx.bcx.ins().imul_imm(count, elem_size as i64) } else { count };
+
+            // FIXME make the copy actually volatile when using emit_small_mem{cpy,move}
+            if intrinsic == sym::volatile_copy_nonoverlapping_memory {
+                // FIXME emit_small_memcpy
+                fx.bcx.call_memcpy(fx.target_config, dst, src, byte_amount);
+            } else {
+                // FIXME emit_small_memmove
+                fx.bcx.call_memmove(fx.target_config, dst, src, byte_amount);
+            }
+        }
+        sym::size_of_val => {
+            intrinsic_args!(fx, args => (ptr); intrinsic);
+
+            let layout = fx.layout_of(generic_args.type_at(0));
+            // Note: Can't use is_unsized here as truly unsized types need to take the fixed size
+            // branch
+            let meta = if let Abi::ScalarPair(_, _) = ptr.layout().abi {
+                Some(ptr.load_scalar_pair(fx).1)
+            } else {
+                None
+            };
+            let (size, _align) = crate::unsize::size_and_align_of(fx, layout, meta);
+            ret.write_cvalue(fx, CValue::by_val(size, usize_layout));
+        }
+        sym::min_align_of_val => {
+            intrinsic_args!(fx, args => (ptr); intrinsic);
+
+            let layout = fx.layout_of(generic_args.type_at(0));
+            // Note: Can't use is_unsized here as truly unsized types need to take the fixed size
+            // branch
+            let meta = if let Abi::ScalarPair(_, _) = ptr.layout().abi {
+                Some(ptr.load_scalar_pair(fx).1)
+            } else {
+                None
+            };
+            let (_size, align) = crate::unsize::size_and_align_of(fx, layout, meta);
+            ret.write_cvalue(fx, CValue::by_val(align, usize_layout));
+        }
+
+        sym::vtable_size => {
+            intrinsic_args!(fx, args => (vtable); intrinsic);
+            let vtable = vtable.load_scalar(fx);
+
+            let size = crate::vtable::size_of_obj(fx, vtable);
+            ret.write_cvalue(fx, CValue::by_val(size, usize_layout));
+        }
+
+        sym::vtable_align => {
+            intrinsic_args!(fx, args => (vtable); intrinsic);
+            let vtable = vtable.load_scalar(fx);
+
+            let align = crate::vtable::min_align_of_obj(fx, vtable);
+            ret.write_cvalue(fx, CValue::by_val(align, usize_layout));
+        }
+
+        sym::exact_div => {
+            intrinsic_args!(fx, args => (x, y); intrinsic);
+
+            // FIXME trap on inexact
+            let res = crate::num::codegen_int_binop(fx, BinOp::Div, x, y);
+            ret.write_cvalue(fx, res);
+        }
+        sym::saturating_add | sym::saturating_sub => {
+            intrinsic_args!(fx, args => (lhs, rhs); intrinsic);
+
+            assert_eq!(lhs.layout().ty, rhs.layout().ty);
+            let bin_op = match intrinsic {
+                sym::saturating_add => BinOp::Add,
+                sym::saturating_sub => BinOp::Sub,
+                _ => unreachable!(),
+            };
+
+            let res = crate::num::codegen_saturating_int_binop(fx, bin_op, lhs, rhs);
+            ret.write_cvalue(fx, res);
+        }
+        sym::rotate_left => {
+            intrinsic_args!(fx, args => (x, y); intrinsic);
+            let y = y.load_scalar(fx);
+
+            let layout = x.layout();
+            let x = x.load_scalar(fx);
+            let res = fx.bcx.ins().rotl(x, y);
+            ret.write_cvalue(fx, CValue::by_val(res, layout));
+        }
+        sym::rotate_right => {
+            intrinsic_args!(fx, args => (x, y); intrinsic);
+            let y = y.load_scalar(fx);
+
+            let layout = x.layout();
+            let x = x.load_scalar(fx);
+            let res = fx.bcx.ins().rotr(x, y);
+            ret.write_cvalue(fx, CValue::by_val(res, layout));
+        }
+
+        // The only difference between offset and arith_offset is regarding UB. Because Cranelift
+        // doesn't have UB both are codegen'ed the same way
+        sym::arith_offset => {
+            intrinsic_args!(fx, args => (base, offset); intrinsic);
+            let offset = offset.load_scalar(fx);
+
+            let pointee_ty = base.layout().ty.builtin_deref(true).unwrap();
+            let pointee_size = fx.layout_of(pointee_ty).size.bytes();
+            let ptr_diff = if pointee_size != 1 {
+                fx.bcx.ins().imul_imm(offset, pointee_size as i64)
+            } else {
+                offset
+            };
+            let base_val = base.load_scalar(fx);
+            let res = fx.bcx.ins().iadd(base_val, ptr_diff);
+            ret.write_cvalue(fx, CValue::by_val(res, base.layout()));
+        }
+
+        sym::ptr_mask => {
+            intrinsic_args!(fx, args => (ptr, mask); intrinsic);
+            let ptr = ptr.load_scalar(fx);
+            let mask = mask.load_scalar(fx);
+            fx.bcx.ins().band(ptr, mask);
+        }
+
+        sym::write_bytes | sym::volatile_set_memory => {
+            intrinsic_args!(fx, args => (dst, val, count); intrinsic);
+            let val = val.load_scalar(fx);
+            let count = count.load_scalar(fx);
+
+            let pointee_ty = dst.layout().ty.builtin_deref(true).unwrap();
+            let pointee_size = fx.layout_of(pointee_ty).size.bytes();
+            let count = if pointee_size != 1 {
+                fx.bcx.ins().imul_imm(count, pointee_size as i64)
+            } else {
+                count
+            };
+            let dst_ptr = dst.load_scalar(fx);
+            // FIXME make the memset actually volatile when switching to emit_small_memset
+            // FIXME use emit_small_memset
+            fx.bcx.call_memset(fx.target_config, dst_ptr, val, count);
+        }
+        sym::ctlz | sym::ctlz_nonzero => {
+            intrinsic_args!(fx, args => (arg); intrinsic);
+            let val = arg.load_scalar(fx);
+
+            // FIXME trap on `ctlz_nonzero` with zero arg.
+            let res = fx.bcx.ins().clz(val);
+            let res = clif_intcast(fx, res, types::I32, false);
+            let res = CValue::by_val(res, ret.layout());
+            ret.write_cvalue(fx, res);
+        }
+        sym::cttz | sym::cttz_nonzero => {
+            intrinsic_args!(fx, args => (arg); intrinsic);
+            let val = arg.load_scalar(fx);
+
+            // FIXME trap on `cttz_nonzero` with zero arg.
+            let res = fx.bcx.ins().ctz(val);
+            let res = clif_intcast(fx, res, types::I32, false);
+            let res = CValue::by_val(res, ret.layout());
+            ret.write_cvalue(fx, res);
+        }
+        sym::ctpop => {
+            intrinsic_args!(fx, args => (arg); intrinsic);
+            let val = arg.load_scalar(fx);
+
+            let res = fx.bcx.ins().popcnt(val);
+            let res = clif_intcast(fx, res, types::I32, false);
+            let res = CValue::by_val(res, ret.layout());
+            ret.write_cvalue(fx, res);
+        }
+        sym::bitreverse => {
+            intrinsic_args!(fx, args => (arg); intrinsic);
+            let val = arg.load_scalar(fx);
+
+            let res = fx.bcx.ins().bitrev(val);
+            let res = CValue::by_val(res, arg.layout());
+            ret.write_cvalue(fx, res);
+        }
+        sym::bswap => {
+            intrinsic_args!(fx, args => (arg); intrinsic);
+            let val = arg.load_scalar(fx);
+
+            let res = if fx.bcx.func.dfg.value_type(val) == types::I8 {
+                val
+            } else {
+                fx.bcx.ins().bswap(val)
+            };
+            let res = CValue::by_val(res, arg.layout());
+            ret.write_cvalue(fx, res);
+        }
+        sym::assert_inhabited | sym::assert_zero_valid | sym::assert_mem_uninitialized_valid => {
+            intrinsic_args!(fx, args => (); intrinsic);
+
+            let ty = generic_args.type_at(0);
+
+            let requirement = ValidityRequirement::from_intrinsic(intrinsic);
+
+            if let Some(requirement) = requirement {
+                let do_panic = !fx
+                    .tcx
+                    .check_validity_requirement((requirement, fx.param_env().and(ty)))
+                    .expect("expect to have layout during codegen");
+
+                if do_panic {
+                    let layout = fx.layout_of(ty);
+                    let msg_str = with_no_visible_paths!({
+                        with_no_trimmed_paths!({
+                            if layout.abi.is_uninhabited() {
+                                // Use this error even for the other intrinsics as it is more precise.
+                                format!("attempted to instantiate uninhabited type `{}`", ty)
+                            } else if intrinsic == sym::assert_zero_valid {
+                                format!(
+                                    "attempted to zero-initialize type `{}`, which is invalid",
+                                    ty
+                                )
+                            } else {
+                                format!(
+                                    "attempted to leave type `{}` uninitialized, which is invalid",
+                                    ty
+                                )
+                            }
+                        })
+                    });
+                    crate::base::codegen_panic_nounwind(fx, &msg_str, Some(source_info.span));
+                    return Ok(());
+                }
+            }
+        }
+
+        sym::volatile_load | sym::unaligned_volatile_load => {
+            intrinsic_args!(fx, args => (ptr); intrinsic);
+
+            // Cranelift treats loads as volatile by default
+            // FIXME correctly handle unaligned_volatile_load
+            let inner_layout = fx.layout_of(ptr.layout().ty.builtin_deref(true).unwrap());
+            let val = CValue::by_ref(Pointer::new(ptr.load_scalar(fx)), inner_layout);
+            ret.write_cvalue(fx, val);
+        }
+        sym::volatile_store | sym::unaligned_volatile_store | sym::nontemporal_store => {
+            intrinsic_args!(fx, args => (ptr, val); intrinsic);
+            let ptr = ptr.load_scalar(fx);
+
+            // Cranelift treats stores as volatile by default
+            // FIXME correctly handle unaligned_volatile_store
+            // FIXME actually do nontemporal stores if requested
+            let dest = CPlace::for_ptr(Pointer::new(ptr), val.layout());
+            dest.write_cvalue(fx, val);
+        }
+
+        sym::pref_align_of
+        | sym::needs_drop
+        | sym::type_id
+        | sym::type_name
+        | sym::variant_count => {
+            intrinsic_args!(fx, args => (); intrinsic);
+
+            let const_val = fx
+                .tcx
+                .const_eval_instance(ParamEnv::reveal_all(), instance, source_info.span)
+                .unwrap();
+            let val = crate::constant::codegen_const_value(fx, const_val, ret.layout().ty);
+            ret.write_cvalue(fx, val);
+        }
+
+        sym::ptr_offset_from | sym::ptr_offset_from_unsigned => {
+            intrinsic_args!(fx, args => (ptr, base); intrinsic);
+            let ptr = ptr.load_scalar(fx);
+            let base = base.load_scalar(fx);
+            let ty = generic_args.type_at(0);
+
+            let pointee_size: u64 = fx.layout_of(ty).size.bytes();
+            let diff_bytes = fx.bcx.ins().isub(ptr, base);
+            // FIXME this can be an exact division.
+            let val = if intrinsic == sym::ptr_offset_from_unsigned {
+                let usize_layout = fx.layout_of(fx.tcx.types.usize);
+                // Because diff_bytes ULE isize::MAX, this would be fine as signed,
+                // but unsigned is slightly easier to codegen, so might as well.
+                CValue::by_val(fx.bcx.ins().udiv_imm(diff_bytes, pointee_size as i64), usize_layout)
+            } else {
+                let isize_layout = fx.layout_of(fx.tcx.types.isize);
+                CValue::by_val(fx.bcx.ins().sdiv_imm(diff_bytes, pointee_size as i64), isize_layout)
+            };
+            ret.write_cvalue(fx, val);
+        }
+
+        sym::caller_location => {
+            intrinsic_args!(fx, args => (); intrinsic);
+
+            let caller_location = fx.get_caller_location(source_info);
+            ret.write_cvalue(fx, caller_location);
+        }
+
+        _ if intrinsic.as_str().starts_with("atomic_fence") => {
+            intrinsic_args!(fx, args => (); intrinsic);
+
+            fx.bcx.ins().fence();
+        }
+        _ if intrinsic.as_str().starts_with("atomic_singlethreadfence") => {
+            intrinsic_args!(fx, args => (); intrinsic);
+
+            // FIXME use a compiler fence once Cranelift supports it
+            fx.bcx.ins().fence();
+        }
+        _ if intrinsic.as_str().starts_with("atomic_load") => {
+            intrinsic_args!(fx, args => (ptr); intrinsic);
+            let ptr = ptr.load_scalar(fx);
+
+            let ty = generic_args.type_at(0);
+            match ty.kind() {
+                ty::Uint(UintTy::U128) | ty::Int(IntTy::I128) => {
+                    // FIXME implement 128bit atomics
+                    if fx.tcx.is_compiler_builtins(LOCAL_CRATE) {
+                        // special case for compiler-builtins to avoid having to patch it
+                        crate::trap::trap_unimplemented(fx, "128bit atomics not yet supported");
+                        return Ok(());
+                    } else {
+                        fx.tcx
+                            .dcx()
+                            .span_fatal(source_info.span, "128bit atomics not yet supported");
+                    }
+                }
+                ty::Uint(_) | ty::Int(_) | ty::RawPtr(..) => {}
+                _ => {
+                    report_atomic_type_validation_error(fx, intrinsic, source_info.span, ty);
+                    return Ok(());
+                }
+            }
+            let clif_ty = fx.clif_type(ty).unwrap();
+
+            let val = fx.bcx.ins().atomic_load(clif_ty, MemFlags::trusted(), ptr);
+
+            let val = CValue::by_val(val, fx.layout_of(ty));
+            ret.write_cvalue(fx, val);
+        }
+        _ if intrinsic.as_str().starts_with("atomic_store") => {
+            intrinsic_args!(fx, args => (ptr, val); intrinsic);
+            let ptr = ptr.load_scalar(fx);
+
+            let ty = generic_args.type_at(0);
+            match ty.kind() {
+                ty::Uint(UintTy::U128) | ty::Int(IntTy::I128) => {
+                    // FIXME implement 128bit atomics
+                    if fx.tcx.is_compiler_builtins(LOCAL_CRATE) {
+                        // special case for compiler-builtins to avoid having to patch it
+                        crate::trap::trap_unimplemented(fx, "128bit atomics not yet supported");
+                        return Ok(());
+                    } else {
+                        fx.tcx
+                            .dcx()
+                            .span_fatal(source_info.span, "128bit atomics not yet supported");
+                    }
+                }
+                ty::Uint(_) | ty::Int(_) | ty::RawPtr(..) => {}
+                _ => {
+                    report_atomic_type_validation_error(fx, intrinsic, source_info.span, ty);
+                    return Ok(());
+                }
+            }
+
+            let val = val.load_scalar(fx);
+
+            fx.bcx.ins().atomic_store(MemFlags::trusted(), val, ptr);
+        }
+        _ if intrinsic.as_str().starts_with("atomic_xchg") => {
+            intrinsic_args!(fx, args => (ptr, new); intrinsic);
+            let ptr = ptr.load_scalar(fx);
+
+            let layout = new.layout();
+            match layout.ty.kind() {
+                ty::Uint(_) | ty::Int(_) | ty::RawPtr(..) => {}
+                _ => {
+                    report_atomic_type_validation_error(fx, intrinsic, source_info.span, layout.ty);
+                    return Ok(());
+                }
+            }
+            let ty = fx.clif_type(layout.ty).unwrap();
+
+            let new = new.load_scalar(fx);
+
+            let old = fx.bcx.ins().atomic_rmw(ty, MemFlags::trusted(), AtomicRmwOp::Xchg, ptr, new);
+
+            let old = CValue::by_val(old, layout);
+            ret.write_cvalue(fx, old);
+        }
+        _ if intrinsic.as_str().starts_with("atomic_cxchg") => {
+            // both atomic_cxchg_* and atomic_cxchgweak_*
+            intrinsic_args!(fx, args => (ptr, test_old, new); intrinsic);
+            let ptr = ptr.load_scalar(fx);
+
+            let layout = new.layout();
+            match layout.ty.kind() {
+                ty::Uint(_) | ty::Int(_) | ty::RawPtr(..) => {}
+                _ => {
+                    report_atomic_type_validation_error(fx, intrinsic, source_info.span, layout.ty);
+                    return Ok(());
+                }
+            }
+
+            let test_old = test_old.load_scalar(fx);
+            let new = new.load_scalar(fx);
+
+            let old = fx.bcx.ins().atomic_cas(MemFlags::trusted(), ptr, test_old, new);
+            let is_eq = fx.bcx.ins().icmp(IntCC::Equal, old, test_old);
+
+            let ret_val = CValue::by_val_pair(old, is_eq, ret.layout());
+            ret.write_cvalue(fx, ret_val)
+        }
+
+        _ if intrinsic.as_str().starts_with("atomic_xadd") => {
+            intrinsic_args!(fx, args => (ptr, amount); intrinsic);
+            let ptr = ptr.load_scalar(fx);
+
+            let layout = amount.layout();
+            match layout.ty.kind() {
+                ty::Uint(_) | ty::Int(_) | ty::RawPtr(..) => {}
+                _ => {
+                    report_atomic_type_validation_error(fx, intrinsic, source_info.span, layout.ty);
+                    return Ok(());
+                }
+            }
+            let ty = fx.clif_type(layout.ty).unwrap();
+
+            let amount = amount.load_scalar(fx);
+
+            let old =
+                fx.bcx.ins().atomic_rmw(ty, MemFlags::trusted(), AtomicRmwOp::Add, ptr, amount);
+
+            let old = CValue::by_val(old, layout);
+            ret.write_cvalue(fx, old);
+        }
+        _ if intrinsic.as_str().starts_with("atomic_xsub") => {
+            intrinsic_args!(fx, args => (ptr, amount); intrinsic);
+            let ptr = ptr.load_scalar(fx);
+
+            let layout = amount.layout();
+            match layout.ty.kind() {
+                ty::Uint(_) | ty::Int(_) | ty::RawPtr(..) => {}
+                _ => {
+                    report_atomic_type_validation_error(fx, intrinsic, source_info.span, layout.ty);
+                    return Ok(());
+                }
+            }
+            let ty = fx.clif_type(layout.ty).unwrap();
+
+            let amount = amount.load_scalar(fx);
+
+            let old =
+                fx.bcx.ins().atomic_rmw(ty, MemFlags::trusted(), AtomicRmwOp::Sub, ptr, amount);
+
+            let old = CValue::by_val(old, layout);
+            ret.write_cvalue(fx, old);
+        }
+        _ if intrinsic.as_str().starts_with("atomic_and") => {
+            intrinsic_args!(fx, args => (ptr, src); intrinsic);
+            let ptr = ptr.load_scalar(fx);
+
+            let layout = src.layout();
+            match layout.ty.kind() {
+                ty::Uint(_) | ty::Int(_) | ty::RawPtr(..) => {}
+                _ => {
+                    report_atomic_type_validation_error(fx, intrinsic, source_info.span, layout.ty);
+                    return Ok(());
+                }
+            }
+            let ty = fx.clif_type(layout.ty).unwrap();
+
+            let src = src.load_scalar(fx);
+
+            let old = fx.bcx.ins().atomic_rmw(ty, MemFlags::trusted(), AtomicRmwOp::And, ptr, src);
+
+            let old = CValue::by_val(old, layout);
+            ret.write_cvalue(fx, old);
+        }
+        _ if intrinsic.as_str().starts_with("atomic_or") => {
+            intrinsic_args!(fx, args => (ptr, src); intrinsic);
+            let ptr = ptr.load_scalar(fx);
+
+            let layout = src.layout();
+            match layout.ty.kind() {
+                ty::Uint(_) | ty::Int(_) | ty::RawPtr(..) => {}
+                _ => {
+                    report_atomic_type_validation_error(fx, intrinsic, source_info.span, layout.ty);
+                    return Ok(());
+                }
+            }
+            let ty = fx.clif_type(layout.ty).unwrap();
+
+            let src = src.load_scalar(fx);
+
+            let old = fx.bcx.ins().atomic_rmw(ty, MemFlags::trusted(), AtomicRmwOp::Or, ptr, src);
+
+            let old = CValue::by_val(old, layout);
+            ret.write_cvalue(fx, old);
+        }
+        _ if intrinsic.as_str().starts_with("atomic_xor") => {
+            intrinsic_args!(fx, args => (ptr, src); intrinsic);
+            let ptr = ptr.load_scalar(fx);
+
+            let layout = src.layout();
+            match layout.ty.kind() {
+                ty::Uint(_) | ty::Int(_) | ty::RawPtr(..) => {}
+                _ => {
+                    report_atomic_type_validation_error(fx, intrinsic, source_info.span, layout.ty);
+                    return Ok(());
+                }
+            }
+            let ty = fx.clif_type(layout.ty).unwrap();
+
+            let src = src.load_scalar(fx);
+
+            let old = fx.bcx.ins().atomic_rmw(ty, MemFlags::trusted(), AtomicRmwOp::Xor, ptr, src);
+
+            let old = CValue::by_val(old, layout);
+            ret.write_cvalue(fx, old);
+        }
+        _ if intrinsic.as_str().starts_with("atomic_nand") => {
+            intrinsic_args!(fx, args => (ptr, src); intrinsic);
+            let ptr = ptr.load_scalar(fx);
+
+            let layout = src.layout();
+            match layout.ty.kind() {
+                ty::Uint(_) | ty::Int(_) | ty::RawPtr(..) => {}
+                _ => {
+                    report_atomic_type_validation_error(fx, intrinsic, source_info.span, layout.ty);
+                    return Ok(());
+                }
+            }
+            let ty = fx.clif_type(layout.ty).unwrap();
+
+            let src = src.load_scalar(fx);
+
+            let old = fx.bcx.ins().atomic_rmw(ty, MemFlags::trusted(), AtomicRmwOp::Nand, ptr, src);
+
+            let old = CValue::by_val(old, layout);
+            ret.write_cvalue(fx, old);
+        }
+        _ if intrinsic.as_str().starts_with("atomic_max") => {
+            intrinsic_args!(fx, args => (ptr, src); intrinsic);
+            let ptr = ptr.load_scalar(fx);
+
+            let layout = src.layout();
+            match layout.ty.kind() {
+                ty::Uint(_) | ty::Int(_) | ty::RawPtr(..) => {}
+                _ => {
+                    report_atomic_type_validation_error(fx, intrinsic, source_info.span, layout.ty);
+                    return Ok(());
+                }
+            }
+            let ty = fx.clif_type(layout.ty).unwrap();
+
+            let src = src.load_scalar(fx);
+
+            let old = fx.bcx.ins().atomic_rmw(ty, MemFlags::trusted(), AtomicRmwOp::Smax, ptr, src);
+
+            let old = CValue::by_val(old, layout);
+            ret.write_cvalue(fx, old);
+        }
+        _ if intrinsic.as_str().starts_with("atomic_umax") => {
+            intrinsic_args!(fx, args => (ptr, src); intrinsic);
+            let ptr = ptr.load_scalar(fx);
+
+            let layout = src.layout();
+            match layout.ty.kind() {
+                ty::Uint(_) | ty::Int(_) | ty::RawPtr(..) => {}
+                _ => {
+                    report_atomic_type_validation_error(fx, intrinsic, source_info.span, layout.ty);
+                    return Ok(());
+                }
+            }
+            let ty = fx.clif_type(layout.ty).unwrap();
+
+            let src = src.load_scalar(fx);
+
+            let old = fx.bcx.ins().atomic_rmw(ty, MemFlags::trusted(), AtomicRmwOp::Umax, ptr, src);
+
+            let old = CValue::by_val(old, layout);
+            ret.write_cvalue(fx, old);
+        }
+        _ if intrinsic.as_str().starts_with("atomic_min") => {
+            intrinsic_args!(fx, args => (ptr, src); intrinsic);
+            let ptr = ptr.load_scalar(fx);
+
+            let layout = src.layout();
+            match layout.ty.kind() {
+                ty::Uint(_) | ty::Int(_) | ty::RawPtr(..) => {}
+                _ => {
+                    report_atomic_type_validation_error(fx, intrinsic, source_info.span, layout.ty);
+                    return Ok(());
+                }
+            }
+            let ty = fx.clif_type(layout.ty).unwrap();
+
+            let src = src.load_scalar(fx);
+
+            let old = fx.bcx.ins().atomic_rmw(ty, MemFlags::trusted(), AtomicRmwOp::Smin, ptr, src);
+
+            let old = CValue::by_val(old, layout);
+            ret.write_cvalue(fx, old);
+        }
+        _ if intrinsic.as_str().starts_with("atomic_umin") => {
+            intrinsic_args!(fx, args => (ptr, src); intrinsic);
+            let ptr = ptr.load_scalar(fx);
+
+            let layout = src.layout();
+            match layout.ty.kind() {
+                ty::Uint(_) | ty::Int(_) | ty::RawPtr(..) => {}
+                _ => {
+                    report_atomic_type_validation_error(fx, intrinsic, source_info.span, layout.ty);
+                    return Ok(());
+                }
+            }
+            let ty = fx.clif_type(layout.ty).unwrap();
+
+            let src = src.load_scalar(fx);
+
+            let old = fx.bcx.ins().atomic_rmw(ty, MemFlags::trusted(), AtomicRmwOp::Umin, ptr, src);
+
+            let old = CValue::by_val(old, layout);
+            ret.write_cvalue(fx, old);
+        }
+
+        sym::minnumf32 => {
+            intrinsic_args!(fx, args => (a, b); intrinsic);
+            let a = a.load_scalar(fx);
+            let b = b.load_scalar(fx);
+
+            let val = crate::num::codegen_float_min(fx, a, b);
+            let val = CValue::by_val(val, fx.layout_of(fx.tcx.types.f32));
+            ret.write_cvalue(fx, val);
+        }
+        sym::minnumf64 => {
+            intrinsic_args!(fx, args => (a, b); intrinsic);
+            let a = a.load_scalar(fx);
+            let b = b.load_scalar(fx);
+
+            let val = crate::num::codegen_float_min(fx, a, b);
+            let val = CValue::by_val(val, fx.layout_of(fx.tcx.types.f64));
+            ret.write_cvalue(fx, val);
+        }
+        sym::maxnumf32 => {
+            intrinsic_args!(fx, args => (a, b); intrinsic);
+            let a = a.load_scalar(fx);
+            let b = b.load_scalar(fx);
+
+            let val = crate::num::codegen_float_max(fx, a, b);
+            let val = CValue::by_val(val, fx.layout_of(fx.tcx.types.f32));
+            ret.write_cvalue(fx, val);
+        }
+        sym::maxnumf64 => {
+            intrinsic_args!(fx, args => (a, b); intrinsic);
+            let a = a.load_scalar(fx);
+            let b = b.load_scalar(fx);
+
+            let val = crate::num::codegen_float_max(fx, a, b);
+            let val = CValue::by_val(val, fx.layout_of(fx.tcx.types.f64));
+            ret.write_cvalue(fx, val);
+        }
+
+        sym::catch_unwind => {
+            intrinsic_args!(fx, args => (f, data, catch_fn); intrinsic);
+            let f = f.load_scalar(fx);
+            let data = data.load_scalar(fx);
+            let _catch_fn = catch_fn.load_scalar(fx);
+
+            // FIXME once unwinding is supported, change this to actually catch panics
+            let f_sig = fx.bcx.func.import_signature(Signature {
+                call_conv: fx.target_config.default_call_conv,
+                params: vec![AbiParam::new(pointer_ty(fx.tcx))],
+                returns: vec![],
+            });
+
+            fx.bcx.ins().call_indirect(f_sig, f, &[data]);
+
+            let layout = fx.layout_of(fx.tcx.types.i32);
+            let ret_val = CValue::by_val(fx.bcx.ins().iconst(types::I32, 0), layout);
+            ret.write_cvalue(fx, ret_val);
+        }
+
+        sym::fadd_fast
+        | sym::fsub_fast
+        | sym::fmul_fast
+        | sym::fdiv_fast
+        | sym::frem_fast
+        | sym::fadd_algebraic
+        | sym::fsub_algebraic
+        | sym::fmul_algebraic
+        | sym::fdiv_algebraic
+        | sym::frem_algebraic => {
+            intrinsic_args!(fx, args => (x, y); intrinsic);
+
+            let res = crate::num::codegen_float_binop(
+                fx,
+                match intrinsic {
+                    sym::fadd_fast | sym::fadd_algebraic => BinOp::Add,
+                    sym::fsub_fast | sym::fsub_algebraic => BinOp::Sub,
+                    sym::fmul_fast | sym::fmul_algebraic => BinOp::Mul,
+                    sym::fdiv_fast | sym::fdiv_algebraic => BinOp::Div,
+                    sym::frem_fast | sym::frem_algebraic => BinOp::Rem,
+                    _ => unreachable!(),
+                },
+                x,
+                y,
+            );
+            ret.write_cvalue(fx, res);
+        }
+        sym::float_to_int_unchecked => {
+            intrinsic_args!(fx, args => (f); intrinsic);
+            let f = f.load_scalar(fx);
+
+            let res = crate::cast::clif_int_or_float_cast(
+                fx,
+                f,
+                false,
+                fx.clif_type(ret.layout().ty).unwrap(),
+                type_sign(ret.layout().ty),
+            );
+            ret.write_cvalue(fx, CValue::by_val(res, ret.layout()));
+        }
+
+        sym::raw_eq => {
+            intrinsic_args!(fx, args => (lhs_ref, rhs_ref); intrinsic);
+            let lhs_ref = lhs_ref.load_scalar(fx);
+            let rhs_ref = rhs_ref.load_scalar(fx);
+
+            let size = fx.layout_of(generic_args.type_at(0)).layout.size();
+            // FIXME add and use emit_small_memcmp
+            let is_eq_value = if size == Size::ZERO {
+                // No bytes means they're trivially equal
+                fx.bcx.ins().iconst(types::I8, 1)
+            } else if let Some(clty) = size.bits().try_into().ok().and_then(Type::int) {
+                // Can't use `trusted` for these loads; they could be unaligned.
+                let mut flags = MemFlags::new();
+                flags.set_notrap();
+                let lhs_val = fx.bcx.ins().load(clty, flags, lhs_ref, 0);
+                let rhs_val = fx.bcx.ins().load(clty, flags, rhs_ref, 0);
+                fx.bcx.ins().icmp(IntCC::Equal, lhs_val, rhs_val)
+            } else {
+                // Just call `memcmp` (like slices do in core) when the
+                // size is too large or it's not a power-of-two.
+                let signed_bytes = i64::try_from(size.bytes()).unwrap();
+                let bytes_val = fx.bcx.ins().iconst(fx.pointer_type, signed_bytes);
+                let params = vec![AbiParam::new(fx.pointer_type); 3];
+                let returns = vec![AbiParam::new(types::I32)];
+                let args = &[lhs_ref, rhs_ref, bytes_val];
+                let cmp = fx.lib_call("memcmp", params, returns, args)[0];
+                fx.bcx.ins().icmp_imm(IntCC::Equal, cmp, 0)
+            };
+            ret.write_cvalue(fx, CValue::by_val(is_eq_value, ret.layout()));
+        }
+
+        sym::compare_bytes => {
+            intrinsic_args!(fx, args => (lhs_ptr, rhs_ptr, bytes_val); intrinsic);
+            let lhs_ptr = lhs_ptr.load_scalar(fx);
+            let rhs_ptr = rhs_ptr.load_scalar(fx);
+            let bytes_val = bytes_val.load_scalar(fx);
+
+            let params = vec![AbiParam::new(fx.pointer_type); 3];
+            let returns = vec![AbiParam::new(types::I32)];
+            let args = &[lhs_ptr, rhs_ptr, bytes_val];
+            // Here we assume that the `memcmp` provided by the target is a NOP for size 0.
+            let cmp = fx.lib_call("memcmp", params, returns, args)[0];
+            ret.write_cvalue(fx, CValue::by_val(cmp, ret.layout()));
+        }
+
+        sym::black_box => {
+            intrinsic_args!(fx, args => (a); intrinsic);
+
+            // FIXME implement black_box semantics
+            ret.write_cvalue(fx, a);
+        }
+
+        // FIXME implement variadics in cranelift
+        sym::va_copy | sym::va_arg | sym::va_end => {
+            fx.tcx.dcx().span_fatal(
+                source_info.span,
+                "Defining variadic functions is not yet supported by Cranelift",
+            );
+        }
+
+        // Unimplemented intrinsics must have a fallback body. The fallback body is obtained
+        // by converting the `InstanceDef::Intrinsic` to an `InstanceDef::Item`.
+        _ => {
+            let intrinsic = fx.tcx.intrinsic(instance.def_id()).unwrap();
+            if intrinsic.must_be_overridden {
+                span_bug!(
+                    source_info.span,
+                    "intrinsic {} must be overridden by codegen_cranelift, but isn't",
+                    intrinsic.name,
+                );
+            }
+            return Err(Instance::new(instance.def_id(), instance.args));
+        }
+    }
+
+    let ret_block = fx.get_block(destination.unwrap());
+    fx.bcx.ins().jump(ret_block, &[]);
+    Ok(())
+}
diff --git a/compiler/rustc_codegen_cranelift/src/intrinsics/simd.rs b/compiler/rustc_codegen_cranelift/src/intrinsics/simd.rs
new file mode 100644
index 00000000000..452b5988dab
--- /dev/null
+++ b/compiler/rustc_codegen_cranelift/src/intrinsics/simd.rs
@@ -0,0 +1,1162 @@
+//! Codegen `extern "platform-intrinsic"` intrinsics.
+
+use cranelift_codegen::ir::immediates::Offset32;
+use rustc_target::abi::Endian;
+
+use super::*;
+use crate::prelude::*;
+
+fn report_simd_type_validation_error(
+    fx: &mut FunctionCx<'_, '_, '_>,
+    intrinsic: Symbol,
+    span: Span,
+    ty: Ty<'_>,
+) {
+    fx.tcx.dcx().span_err(span, format!("invalid monomorphization of `{}` intrinsic: expected SIMD input type, found non-SIMD `{}`", intrinsic, ty));
+    // Prevent verifier error
+    fx.bcx.ins().trap(TrapCode::UnreachableCodeReached);
+}
+
+pub(super) fn codegen_simd_intrinsic_call<'tcx>(
+    fx: &mut FunctionCx<'_, '_, 'tcx>,
+    intrinsic: Symbol,
+    generic_args: GenericArgsRef<'tcx>,
+    args: &[Spanned<mir::Operand<'tcx>>],
+    ret: CPlace<'tcx>,
+    target: BasicBlock,
+    span: Span,
+) {
+    match intrinsic {
+        sym::simd_as | sym::simd_cast => {
+            intrinsic_args!(fx, args => (a); intrinsic);
+
+            if !a.layout().ty.is_simd() {
+                report_simd_type_validation_error(fx, intrinsic, span, a.layout().ty);
+                return;
+            }
+
+            simd_for_each_lane(fx, a, ret, &|fx, lane_ty, ret_lane_ty, lane| {
+                let ret_lane_clif_ty = fx.clif_type(ret_lane_ty).unwrap();
+
+                let from_signed = type_sign(lane_ty);
+                let to_signed = type_sign(ret_lane_ty);
+
+                clif_int_or_float_cast(fx, lane, from_signed, ret_lane_clif_ty, to_signed)
+            });
+        }
+
+        sym::simd_eq | sym::simd_ne | sym::simd_lt | sym::simd_le | sym::simd_gt | sym::simd_ge => {
+            intrinsic_args!(fx, args => (x, y); intrinsic);
+
+            if !x.layout().ty.is_simd() {
+                report_simd_type_validation_error(fx, intrinsic, span, x.layout().ty);
+                return;
+            }
+
+            // FIXME use vector instructions when possible
+            simd_pair_for_each_lane(fx, x, y, ret, &|fx, lane_ty, res_lane_ty, x_lane, y_lane| {
+                let res_lane = match (lane_ty.kind(), intrinsic) {
+                    (ty::Uint(_), sym::simd_eq) => fx.bcx.ins().icmp(IntCC::Equal, x_lane, y_lane),
+                    (ty::Uint(_), sym::simd_ne) => {
+                        fx.bcx.ins().icmp(IntCC::NotEqual, x_lane, y_lane)
+                    }
+                    (ty::Uint(_), sym::simd_lt) => {
+                        fx.bcx.ins().icmp(IntCC::UnsignedLessThan, x_lane, y_lane)
+                    }
+                    (ty::Uint(_), sym::simd_le) => {
+                        fx.bcx.ins().icmp(IntCC::UnsignedLessThanOrEqual, x_lane, y_lane)
+                    }
+                    (ty::Uint(_), sym::simd_gt) => {
+                        fx.bcx.ins().icmp(IntCC::UnsignedGreaterThan, x_lane, y_lane)
+                    }
+                    (ty::Uint(_), sym::simd_ge) => {
+                        fx.bcx.ins().icmp(IntCC::UnsignedGreaterThanOrEqual, x_lane, y_lane)
+                    }
+
+                    (ty::Int(_), sym::simd_eq) => fx.bcx.ins().icmp(IntCC::Equal, x_lane, y_lane),
+                    (ty::Int(_), sym::simd_ne) => {
+                        fx.bcx.ins().icmp(IntCC::NotEqual, x_lane, y_lane)
+                    }
+                    (ty::Int(_), sym::simd_lt) => {
+                        fx.bcx.ins().icmp(IntCC::SignedLessThan, x_lane, y_lane)
+                    }
+                    (ty::Int(_), sym::simd_le) => {
+                        fx.bcx.ins().icmp(IntCC::SignedLessThanOrEqual, x_lane, y_lane)
+                    }
+                    (ty::Int(_), sym::simd_gt) => {
+                        fx.bcx.ins().icmp(IntCC::SignedGreaterThan, x_lane, y_lane)
+                    }
+                    (ty::Int(_), sym::simd_ge) => {
+                        fx.bcx.ins().icmp(IntCC::SignedGreaterThanOrEqual, x_lane, y_lane)
+                    }
+
+                    (ty::Float(_), sym::simd_eq) => {
+                        fx.bcx.ins().fcmp(FloatCC::Equal, x_lane, y_lane)
+                    }
+                    (ty::Float(_), sym::simd_ne) => {
+                        fx.bcx.ins().fcmp(FloatCC::NotEqual, x_lane, y_lane)
+                    }
+                    (ty::Float(_), sym::simd_lt) => {
+                        fx.bcx.ins().fcmp(FloatCC::LessThan, x_lane, y_lane)
+                    }
+                    (ty::Float(_), sym::simd_le) => {
+                        fx.bcx.ins().fcmp(FloatCC::LessThanOrEqual, x_lane, y_lane)
+                    }
+                    (ty::Float(_), sym::simd_gt) => {
+                        fx.bcx.ins().fcmp(FloatCC::GreaterThan, x_lane, y_lane)
+                    }
+                    (ty::Float(_), sym::simd_ge) => {
+                        fx.bcx.ins().fcmp(FloatCC::GreaterThanOrEqual, x_lane, y_lane)
+                    }
+
+                    _ => unreachable!(),
+                };
+
+                bool_to_zero_or_max_uint(fx, res_lane_ty, res_lane)
+            });
+        }
+
+        // simd_shuffle_generic<T, U, const I: &[u32]>(x: T, y: T) -> U
+        sym::simd_shuffle_generic => {
+            let [x, y] = args else {
+                bug!("wrong number of args for intrinsic {intrinsic}");
+            };
+            let x = codegen_operand(fx, &x.node);
+            let y = codegen_operand(fx, &y.node);
+
+            if !x.layout().ty.is_simd() {
+                report_simd_type_validation_error(fx, intrinsic, span, x.layout().ty);
+                return;
+            }
+
+            let idx = generic_args[2]
+                .expect_const()
+                .eval(fx.tcx, ty::ParamEnv::reveal_all(), span)
+                .unwrap()
+                .unwrap_branch();
+
+            assert_eq!(x.layout(), y.layout());
+            let layout = x.layout();
+
+            let (lane_count, lane_ty) = layout.ty.simd_size_and_type(fx.tcx);
+            let (ret_lane_count, ret_lane_ty) = ret.layout().ty.simd_size_and_type(fx.tcx);
+
+            assert_eq!(lane_ty, ret_lane_ty);
+            assert_eq!(idx.len() as u64, ret_lane_count);
+
+            let total_len = lane_count * 2;
+
+            let indexes =
+                idx.iter().map(|idx| idx.unwrap_leaf().try_to_u32().unwrap()).collect::<Vec<u32>>();
+
+            for &idx in &indexes {
+                assert!(u64::from(idx) < total_len, "idx {} out of range 0..{}", idx, total_len);
+            }
+
+            for (out_idx, in_idx) in indexes.into_iter().enumerate() {
+                let in_lane = if u64::from(in_idx) < lane_count {
+                    x.value_lane(fx, in_idx.into())
+                } else {
+                    y.value_lane(fx, u64::from(in_idx) - lane_count)
+                };
+                let out_lane = ret.place_lane(fx, u64::try_from(out_idx).unwrap());
+                out_lane.write_cvalue(fx, in_lane);
+            }
+        }
+
+        // simd_shuffle<T, I, U>(x: T, y: T, idx: I) -> U
+        sym::simd_shuffle => {
+            let (x, y, idx) = match args {
+                [x, y, idx] => (x, y, idx),
+                _ => {
+                    bug!("wrong number of args for intrinsic {intrinsic}");
+                }
+            };
+            let x = codegen_operand(fx, &x.node);
+            let y = codegen_operand(fx, &y.node);
+
+            if !x.layout().ty.is_simd() {
+                report_simd_type_validation_error(fx, intrinsic, span, x.layout().ty);
+                return;
+            }
+
+            // Make sure this is actually an array, since typeck only checks the length-suffixed
+            // version of this intrinsic.
+            let idx_ty = fx.monomorphize(idx.node.ty(fx.mir, fx.tcx));
+            let n: u16 = match idx_ty.kind() {
+                ty::Array(ty, len) if matches!(ty.kind(), ty::Uint(ty::UintTy::U32)) => len
+                    .try_eval_target_usize(fx.tcx, ty::ParamEnv::reveal_all())
+                    .unwrap_or_else(|| {
+                        span_bug!(span, "could not evaluate shuffle index array length")
+                    })
+                    .try_into()
+                    .unwrap(),
+                _ => {
+                    fx.tcx.dcx().span_err(
+                        span,
+                        format!("simd_shuffle index must be an array of `u32`, got `{}`", idx_ty),
+                    );
+                    // Prevent verifier error
+                    fx.bcx.ins().trap(TrapCode::UnreachableCodeReached);
+                    return;
+                }
+            };
+
+            assert_eq!(x.layout(), y.layout());
+            let layout = x.layout();
+
+            let (lane_count, lane_ty) = layout.ty.simd_size_and_type(fx.tcx);
+            let (ret_lane_count, ret_lane_ty) = ret.layout().ty.simd_size_and_type(fx.tcx);
+
+            assert_eq!(lane_ty, ret_lane_ty);
+            assert_eq!(u64::from(n), ret_lane_count);
+
+            let total_len = lane_count * 2;
+
+            let indexes = {
+                use rustc_middle::mir::interpret::*;
+                let idx_const = match &idx.node {
+                    Operand::Constant(const_) => crate::constant::eval_mir_constant(fx, const_).0,
+                    Operand::Copy(_) | Operand::Move(_) => unreachable!("{idx:?}"),
+                };
+
+                let idx_bytes = match idx_const {
+                    ConstValue::Indirect { alloc_id, offset } => {
+                        let alloc = fx.tcx.global_alloc(alloc_id).unwrap_memory();
+                        let size = Size::from_bytes(
+                            4 * ret_lane_count, /* size_of([u32; ret_lane_count]) */
+                        );
+                        alloc
+                            .inner()
+                            .get_bytes_strip_provenance(fx, alloc_range(offset, size))
+                            .unwrap()
+                    }
+                    _ => unreachable!("{:?}", idx_const),
+                };
+
+                (0..ret_lane_count)
+                    .map(|i| {
+                        let i = usize::try_from(i).unwrap();
+                        let idx = rustc_middle::mir::interpret::read_target_uint(
+                            fx.tcx.data_layout.endian,
+                            &idx_bytes[4 * i..4 * i + 4],
+                        )
+                        .expect("read_target_uint");
+                        u16::try_from(idx).expect("try_from u32")
+                    })
+                    .collect::<Vec<u16>>()
+            };
+
+            for &idx in &indexes {
+                assert!(u64::from(idx) < total_len, "idx {} out of range 0..{}", idx, total_len);
+            }
+
+            for (out_idx, in_idx) in indexes.into_iter().enumerate() {
+                let in_lane = if u64::from(in_idx) < lane_count {
+                    x.value_lane(fx, in_idx.into())
+                } else {
+                    y.value_lane(fx, u64::from(in_idx) - lane_count)
+                };
+                let out_lane = ret.place_lane(fx, u64::try_from(out_idx).unwrap());
+                out_lane.write_cvalue(fx, in_lane);
+            }
+        }
+
+        sym::simd_insert => {
+            let (base, idx, val) = match args {
+                [base, idx, val] => (base, idx, val),
+                _ => {
+                    bug!("wrong number of args for intrinsic {intrinsic}");
+                }
+            };
+            let base = codegen_operand(fx, &base.node);
+            let val = codegen_operand(fx, &val.node);
+
+            // FIXME validate
+            let idx_const = if let Some(idx_const) =
+                crate::constant::mir_operand_get_const_val(fx, &idx.node)
+            {
+                idx_const
+            } else {
+                fx.tcx.dcx().span_fatal(span, "Index argument for `simd_insert` is not a constant");
+            };
+
+            let idx: u32 = idx_const
+                .try_to_u32()
+                .unwrap_or_else(|_| panic!("kind not scalar: {:?}", idx_const));
+            let (lane_count, _lane_ty) = base.layout().ty.simd_size_and_type(fx.tcx);
+            if u64::from(idx) >= lane_count {
+                fx.tcx.dcx().span_fatal(
+                    fx.mir.span,
+                    format!("[simd_insert] idx {} >= lane_count {}", idx, lane_count),
+                );
+            }
+
+            ret.write_cvalue(fx, base);
+            let ret_lane = ret.place_lane(fx, idx.into());
+            ret_lane.write_cvalue(fx, val);
+        }
+
+        sym::simd_extract => {
+            let (v, idx) = match args {
+                [v, idx] => (v, idx),
+                _ => {
+                    bug!("wrong number of args for intrinsic {intrinsic}");
+                }
+            };
+            let v = codegen_operand(fx, &v.node);
+
+            if !v.layout().ty.is_simd() {
+                report_simd_type_validation_error(fx, intrinsic, span, v.layout().ty);
+                return;
+            }
+
+            let idx_const = if let Some(idx_const) =
+                crate::constant::mir_operand_get_const_val(fx, &idx.node)
+            {
+                idx_const
+            } else {
+                fx.tcx.dcx().span_warn(span, "Index argument for `simd_extract` is not a constant");
+                let trap_block = fx.bcx.create_block();
+                let true_ = fx.bcx.ins().iconst(types::I8, 1);
+                let ret_block = fx.get_block(target);
+                fx.bcx.ins().brif(true_, trap_block, &[], ret_block, &[]);
+                fx.bcx.switch_to_block(trap_block);
+                crate::trap::trap_unimplemented(
+                    fx,
+                    "Index argument for `simd_extract` is not a constant",
+                );
+                return;
+            };
+
+            let idx = idx_const
+                .try_to_u32()
+                .unwrap_or_else(|_| panic!("kind not scalar: {:?}", idx_const));
+            let (lane_count, _lane_ty) = v.layout().ty.simd_size_and_type(fx.tcx);
+            if u64::from(idx) >= lane_count {
+                fx.tcx.dcx().span_fatal(
+                    fx.mir.span,
+                    format!("[simd_extract] idx {} >= lane_count {}", idx, lane_count),
+                );
+            }
+
+            let ret_lane = v.value_lane(fx, idx.into());
+            ret.write_cvalue(fx, ret_lane);
+        }
+
+        sym::simd_neg
+        | sym::simd_bswap
+        | sym::simd_bitreverse
+        | sym::simd_ctlz
+        | sym::simd_cttz => {
+            intrinsic_args!(fx, args => (a); intrinsic);
+
+            if !a.layout().ty.is_simd() {
+                report_simd_type_validation_error(fx, intrinsic, span, a.layout().ty);
+                return;
+            }
+
+            simd_for_each_lane(fx, a, ret, &|fx, lane_ty, _ret_lane_ty, lane| match (
+                lane_ty.kind(),
+                intrinsic,
+            ) {
+                (ty::Int(_), sym::simd_neg) => fx.bcx.ins().ineg(lane),
+                (ty::Float(_), sym::simd_neg) => fx.bcx.ins().fneg(lane),
+
+                (ty::Uint(ty::UintTy::U8) | ty::Int(ty::IntTy::I8), sym::simd_bswap) => lane,
+                (ty::Uint(_) | ty::Int(_), sym::simd_bswap) => fx.bcx.ins().bswap(lane),
+                (ty::Uint(_) | ty::Int(_), sym::simd_bitreverse) => fx.bcx.ins().bitrev(lane),
+                (ty::Uint(_) | ty::Int(_), sym::simd_ctlz) => fx.bcx.ins().clz(lane),
+                (ty::Uint(_) | ty::Int(_), sym::simd_cttz) => fx.bcx.ins().ctz(lane),
+
+                _ => unreachable!(),
+            });
+        }
+
+        sym::simd_add
+        | sym::simd_sub
+        | sym::simd_mul
+        | sym::simd_div
+        | sym::simd_rem
+        | sym::simd_shl
+        | sym::simd_shr
+        | sym::simd_and
+        | sym::simd_or
+        | sym::simd_xor => {
+            intrinsic_args!(fx, args => (x, y); intrinsic);
+
+            // FIXME use vector instructions when possible
+            simd_pair_for_each_lane(fx, x, y, ret, &|fx, lane_ty, _ret_lane_ty, x_lane, y_lane| {
+                match (lane_ty.kind(), intrinsic) {
+                    (ty::Uint(_), sym::simd_add) => fx.bcx.ins().iadd(x_lane, y_lane),
+                    (ty::Uint(_), sym::simd_sub) => fx.bcx.ins().isub(x_lane, y_lane),
+                    (ty::Uint(_), sym::simd_mul) => fx.bcx.ins().imul(x_lane, y_lane),
+                    (ty::Uint(_), sym::simd_div) => fx.bcx.ins().udiv(x_lane, y_lane),
+                    (ty::Uint(_), sym::simd_rem) => fx.bcx.ins().urem(x_lane, y_lane),
+
+                    (ty::Int(_), sym::simd_add) => fx.bcx.ins().iadd(x_lane, y_lane),
+                    (ty::Int(_), sym::simd_sub) => fx.bcx.ins().isub(x_lane, y_lane),
+                    (ty::Int(_), sym::simd_mul) => fx.bcx.ins().imul(x_lane, y_lane),
+                    (ty::Int(_), sym::simd_div) => fx.bcx.ins().sdiv(x_lane, y_lane),
+                    (ty::Int(_), sym::simd_rem) => fx.bcx.ins().srem(x_lane, y_lane),
+
+                    (ty::Float(_), sym::simd_add) => fx.bcx.ins().fadd(x_lane, y_lane),
+                    (ty::Float(_), sym::simd_sub) => fx.bcx.ins().fsub(x_lane, y_lane),
+                    (ty::Float(_), sym::simd_mul) => fx.bcx.ins().fmul(x_lane, y_lane),
+                    (ty::Float(_), sym::simd_div) => fx.bcx.ins().fdiv(x_lane, y_lane),
+                    (ty::Float(FloatTy::F32), sym::simd_rem) => fx.lib_call(
+                        "fmodf",
+                        vec![AbiParam::new(types::F32), AbiParam::new(types::F32)],
+                        vec![AbiParam::new(types::F32)],
+                        &[x_lane, y_lane],
+                    )[0],
+                    (ty::Float(FloatTy::F64), sym::simd_rem) => fx.lib_call(
+                        "fmod",
+                        vec![AbiParam::new(types::F64), AbiParam::new(types::F64)],
+                        vec![AbiParam::new(types::F64)],
+                        &[x_lane, y_lane],
+                    )[0],
+
+                    (ty::Uint(_), sym::simd_shl) => fx.bcx.ins().ishl(x_lane, y_lane),
+                    (ty::Uint(_), sym::simd_shr) => fx.bcx.ins().ushr(x_lane, y_lane),
+                    (ty::Uint(_), sym::simd_and) => fx.bcx.ins().band(x_lane, y_lane),
+                    (ty::Uint(_), sym::simd_or) => fx.bcx.ins().bor(x_lane, y_lane),
+                    (ty::Uint(_), sym::simd_xor) => fx.bcx.ins().bxor(x_lane, y_lane),
+
+                    (ty::Int(_), sym::simd_shl) => fx.bcx.ins().ishl(x_lane, y_lane),
+                    (ty::Int(_), sym::simd_shr) => fx.bcx.ins().sshr(x_lane, y_lane),
+                    (ty::Int(_), sym::simd_and) => fx.bcx.ins().band(x_lane, y_lane),
+                    (ty::Int(_), sym::simd_or) => fx.bcx.ins().bor(x_lane, y_lane),
+                    (ty::Int(_), sym::simd_xor) => fx.bcx.ins().bxor(x_lane, y_lane),
+
+                    _ => unreachable!(),
+                }
+            });
+        }
+
+        sym::simd_fma => {
+            intrinsic_args!(fx, args => (a, b, c); intrinsic);
+
+            if !a.layout().ty.is_simd() {
+                report_simd_type_validation_error(fx, intrinsic, span, a.layout().ty);
+                return;
+            }
+            assert_eq!(a.layout(), b.layout());
+            assert_eq!(a.layout(), c.layout());
+            assert_eq!(a.layout(), ret.layout());
+
+            let layout = a.layout();
+            let (lane_count, lane_ty) = layout.ty.simd_size_and_type(fx.tcx);
+            let res_lane_layout = fx.layout_of(lane_ty);
+
+            for lane in 0..lane_count {
+                let a_lane = a.value_lane(fx, lane).load_scalar(fx);
+                let b_lane = b.value_lane(fx, lane).load_scalar(fx);
+                let c_lane = c.value_lane(fx, lane).load_scalar(fx);
+
+                let res_lane = fx.bcx.ins().fma(a_lane, b_lane, c_lane);
+                let res_lane = CValue::by_val(res_lane, res_lane_layout);
+
+                ret.place_lane(fx, lane).write_cvalue(fx, res_lane);
+            }
+        }
+
+        sym::simd_fmin | sym::simd_fmax => {
+            intrinsic_args!(fx, args => (x, y); intrinsic);
+
+            if !x.layout().ty.is_simd() {
+                report_simd_type_validation_error(fx, intrinsic, span, x.layout().ty);
+                return;
+            }
+
+            // FIXME use vector instructions when possible
+            simd_pair_for_each_lane(fx, x, y, ret, &|fx, lane_ty, _ret_lane_ty, x_lane, y_lane| {
+                match lane_ty.kind() {
+                    ty::Float(_) => {}
+                    _ => unreachable!("{:?}", lane_ty),
+                }
+                match intrinsic {
+                    sym::simd_fmin => crate::num::codegen_float_min(fx, x_lane, y_lane),
+                    sym::simd_fmax => crate::num::codegen_float_max(fx, x_lane, y_lane),
+                    _ => unreachable!(),
+                }
+            });
+        }
+
+        sym::simd_fpow => {
+            intrinsic_args!(fx, args => (a, b); intrinsic);
+
+            if !a.layout().ty.is_simd() {
+                report_simd_type_validation_error(fx, intrinsic, span, a.layout().ty);
+                return;
+            }
+
+            simd_pair_for_each_lane(fx, a, b, ret, &|fx, lane_ty, _ret_lane_ty, a_lane, b_lane| {
+                match lane_ty.kind() {
+                    ty::Float(FloatTy::F32) => fx.lib_call(
+                        "powf",
+                        vec![AbiParam::new(types::F32), AbiParam::new(types::F32)],
+                        vec![AbiParam::new(types::F32)],
+                        &[a_lane, b_lane],
+                    )[0],
+                    ty::Float(FloatTy::F64) => fx.lib_call(
+                        "pow",
+                        vec![AbiParam::new(types::F64), AbiParam::new(types::F64)],
+                        vec![AbiParam::new(types::F64)],
+                        &[a_lane, b_lane],
+                    )[0],
+                    _ => unreachable!("{:?}", lane_ty),
+                }
+            });
+        }
+
+        sym::simd_fpowi => {
+            intrinsic_args!(fx, args => (a, exp); intrinsic);
+            let exp = exp.load_scalar(fx);
+
+            if !a.layout().ty.is_simd() {
+                report_simd_type_validation_error(fx, intrinsic, span, a.layout().ty);
+                return;
+            }
+
+            simd_for_each_lane(
+                fx,
+                a,
+                ret,
+                &|fx, lane_ty, _ret_lane_ty, lane| match lane_ty.kind() {
+                    ty::Float(FloatTy::F32) => fx.lib_call(
+                        "__powisf2", // compiler-builtins
+                        vec![AbiParam::new(types::F32), AbiParam::new(types::I32)],
+                        vec![AbiParam::new(types::F32)],
+                        &[lane, exp],
+                    )[0],
+                    ty::Float(FloatTy::F64) => fx.lib_call(
+                        "__powidf2", // compiler-builtins
+                        vec![AbiParam::new(types::F64), AbiParam::new(types::I32)],
+                        vec![AbiParam::new(types::F64)],
+                        &[lane, exp],
+                    )[0],
+                    _ => unreachable!("{:?}", lane_ty),
+                },
+            );
+        }
+
+        sym::simd_fsin
+        | sym::simd_fcos
+        | sym::simd_fexp
+        | sym::simd_fexp2
+        | sym::simd_flog
+        | sym::simd_flog10
+        | sym::simd_flog2
+        | sym::simd_round => {
+            intrinsic_args!(fx, args => (a); intrinsic);
+
+            if !a.layout().ty.is_simd() {
+                report_simd_type_validation_error(fx, intrinsic, span, a.layout().ty);
+                return;
+            }
+
+            simd_for_each_lane(fx, a, ret, &|fx, lane_ty, _ret_lane_ty, lane| {
+                let lane_ty = match lane_ty.kind() {
+                    ty::Float(FloatTy::F32) => types::F32,
+                    ty::Float(FloatTy::F64) => types::F64,
+                    _ => unreachable!("{:?}", lane_ty),
+                };
+                let name = match (intrinsic, lane_ty) {
+                    (sym::simd_fsin, types::F32) => "sinf",
+                    (sym::simd_fsin, types::F64) => "sin",
+                    (sym::simd_fcos, types::F32) => "cosf",
+                    (sym::simd_fcos, types::F64) => "cos",
+                    (sym::simd_fexp, types::F32) => "expf",
+                    (sym::simd_fexp, types::F64) => "exp",
+                    (sym::simd_fexp2, types::F32) => "exp2f",
+                    (sym::simd_fexp2, types::F64) => "exp2",
+                    (sym::simd_flog, types::F32) => "logf",
+                    (sym::simd_flog, types::F64) => "log",
+                    (sym::simd_flog10, types::F32) => "log10f",
+                    (sym::simd_flog10, types::F64) => "log10",
+                    (sym::simd_flog2, types::F32) => "log2f",
+                    (sym::simd_flog2, types::F64) => "log2",
+                    (sym::simd_round, types::F32) => "roundf",
+                    (sym::simd_round, types::F64) => "round",
+                    _ => unreachable!("{:?}", intrinsic),
+                };
+                fx.lib_call(
+                    name,
+                    vec![AbiParam::new(lane_ty)],
+                    vec![AbiParam::new(lane_ty)],
+                    &[lane],
+                )[0]
+            });
+        }
+
+        sym::simd_fabs | sym::simd_fsqrt | sym::simd_ceil | sym::simd_floor | sym::simd_trunc => {
+            intrinsic_args!(fx, args => (a); intrinsic);
+
+            if !a.layout().ty.is_simd() {
+                report_simd_type_validation_error(fx, intrinsic, span, a.layout().ty);
+                return;
+            }
+
+            simd_for_each_lane(fx, a, ret, &|fx, lane_ty, _ret_lane_ty, lane| {
+                match lane_ty.kind() {
+                    ty::Float(_) => {}
+                    _ => unreachable!("{:?}", lane_ty),
+                }
+                match intrinsic {
+                    sym::simd_fabs => fx.bcx.ins().fabs(lane),
+                    sym::simd_fsqrt => fx.bcx.ins().sqrt(lane),
+                    sym::simd_ceil => fx.bcx.ins().ceil(lane),
+                    sym::simd_floor => fx.bcx.ins().floor(lane),
+                    sym::simd_trunc => fx.bcx.ins().trunc(lane),
+                    _ => unreachable!(),
+                }
+            });
+        }
+
+        sym::simd_reduce_add_ordered => {
+            intrinsic_args!(fx, args => (v, acc); intrinsic);
+            let acc = acc.load_scalar(fx);
+
+            // FIXME there must be no acc param for integer vectors
+            if !v.layout().ty.is_simd() {
+                report_simd_type_validation_error(fx, intrinsic, span, v.layout().ty);
+                return;
+            }
+
+            simd_reduce(fx, v, Some(acc), ret, &|fx, lane_ty, a, b| {
+                if lane_ty.is_floating_point() {
+                    fx.bcx.ins().fadd(a, b)
+                } else {
+                    fx.bcx.ins().iadd(a, b)
+                }
+            });
+        }
+
+        sym::simd_reduce_add_unordered => {
+            intrinsic_args!(fx, args => (v); intrinsic);
+
+            // FIXME there must be no acc param for integer vectors
+            if !v.layout().ty.is_simd() {
+                report_simd_type_validation_error(fx, intrinsic, span, v.layout().ty);
+                return;
+            }
+
+            simd_reduce(fx, v, None, ret, &|fx, lane_ty, a, b| {
+                if lane_ty.is_floating_point() {
+                    fx.bcx.ins().fadd(a, b)
+                } else {
+                    fx.bcx.ins().iadd(a, b)
+                }
+            });
+        }
+
+        sym::simd_reduce_mul_ordered => {
+            intrinsic_args!(fx, args => (v, acc); intrinsic);
+            let acc = acc.load_scalar(fx);
+
+            // FIXME there must be no acc param for integer vectors
+            if !v.layout().ty.is_simd() {
+                report_simd_type_validation_error(fx, intrinsic, span, v.layout().ty);
+                return;
+            }
+
+            simd_reduce(fx, v, Some(acc), ret, &|fx, lane_ty, a, b| {
+                if lane_ty.is_floating_point() {
+                    fx.bcx.ins().fmul(a, b)
+                } else {
+                    fx.bcx.ins().imul(a, b)
+                }
+            });
+        }
+
+        sym::simd_reduce_mul_unordered => {
+            intrinsic_args!(fx, args => (v); intrinsic);
+
+            // FIXME there must be no acc param for integer vectors
+            if !v.layout().ty.is_simd() {
+                report_simd_type_validation_error(fx, intrinsic, span, v.layout().ty);
+                return;
+            }
+
+            simd_reduce(fx, v, None, ret, &|fx, lane_ty, a, b| {
+                if lane_ty.is_floating_point() {
+                    fx.bcx.ins().fmul(a, b)
+                } else {
+                    fx.bcx.ins().imul(a, b)
+                }
+            });
+        }
+
+        sym::simd_reduce_all => {
+            intrinsic_args!(fx, args => (v); intrinsic);
+
+            if !v.layout().ty.is_simd() {
+                report_simd_type_validation_error(fx, intrinsic, span, v.layout().ty);
+                return;
+            }
+
+            simd_reduce_bool(fx, v, ret, &|fx, a, b| fx.bcx.ins().band(a, b));
+        }
+
+        sym::simd_reduce_any => {
+            intrinsic_args!(fx, args => (v); intrinsic);
+
+            if !v.layout().ty.is_simd() {
+                report_simd_type_validation_error(fx, intrinsic, span, v.layout().ty);
+                return;
+            }
+
+            simd_reduce_bool(fx, v, ret, &|fx, a, b| fx.bcx.ins().bor(a, b));
+        }
+
+        sym::simd_reduce_and => {
+            intrinsic_args!(fx, args => (v); intrinsic);
+
+            if !v.layout().ty.is_simd() {
+                report_simd_type_validation_error(fx, intrinsic, span, v.layout().ty);
+                return;
+            }
+
+            simd_reduce(fx, v, None, ret, &|fx, _ty, a, b| fx.bcx.ins().band(a, b));
+        }
+
+        sym::simd_reduce_or => {
+            intrinsic_args!(fx, args => (v); intrinsic);
+
+            if !v.layout().ty.is_simd() {
+                report_simd_type_validation_error(fx, intrinsic, span, v.layout().ty);
+                return;
+            }
+
+            simd_reduce(fx, v, None, ret, &|fx, _ty, a, b| fx.bcx.ins().bor(a, b));
+        }
+
+        sym::simd_reduce_xor => {
+            intrinsic_args!(fx, args => (v); intrinsic);
+
+            if !v.layout().ty.is_simd() {
+                report_simd_type_validation_error(fx, intrinsic, span, v.layout().ty);
+                return;
+            }
+
+            simd_reduce(fx, v, None, ret, &|fx, _ty, a, b| fx.bcx.ins().bxor(a, b));
+        }
+
+        sym::simd_reduce_min => {
+            intrinsic_args!(fx, args => (v); intrinsic);
+
+            if !v.layout().ty.is_simd() {
+                report_simd_type_validation_error(fx, intrinsic, span, v.layout().ty);
+                return;
+            }
+
+            simd_reduce(fx, v, None, ret, &|fx, ty, a, b| {
+                let lt = match ty.kind() {
+                    ty::Int(_) => fx.bcx.ins().icmp(IntCC::SignedLessThan, a, b),
+                    ty::Uint(_) => fx.bcx.ins().icmp(IntCC::UnsignedLessThan, a, b),
+                    ty::Float(_) => return crate::num::codegen_float_min(fx, a, b),
+                    _ => unreachable!(),
+                };
+                fx.bcx.ins().select(lt, a, b)
+            });
+        }
+
+        sym::simd_reduce_max => {
+            intrinsic_args!(fx, args => (v); intrinsic);
+
+            if !v.layout().ty.is_simd() {
+                report_simd_type_validation_error(fx, intrinsic, span, v.layout().ty);
+                return;
+            }
+
+            simd_reduce(fx, v, None, ret, &|fx, ty, a, b| {
+                let gt = match ty.kind() {
+                    ty::Int(_) => fx.bcx.ins().icmp(IntCC::SignedGreaterThan, a, b),
+                    ty::Uint(_) => fx.bcx.ins().icmp(IntCC::UnsignedGreaterThan, a, b),
+                    ty::Float(_) => return crate::num::codegen_float_max(fx, a, b),
+                    _ => unreachable!(),
+                };
+                fx.bcx.ins().select(gt, a, b)
+            });
+        }
+
+        sym::simd_select => {
+            intrinsic_args!(fx, args => (m, a, b); intrinsic);
+
+            if !m.layout().ty.is_simd() {
+                report_simd_type_validation_error(fx, intrinsic, span, m.layout().ty);
+                return;
+            }
+            if !a.layout().ty.is_simd() {
+                report_simd_type_validation_error(fx, intrinsic, span, a.layout().ty);
+                return;
+            }
+            assert_eq!(a.layout(), b.layout());
+
+            let (lane_count, lane_ty) = a.layout().ty.simd_size_and_type(fx.tcx);
+            let lane_layout = fx.layout_of(lane_ty);
+
+            for lane in 0..lane_count {
+                let m_lane = m.value_lane(fx, lane).load_scalar(fx);
+                let a_lane = a.value_lane(fx, lane).load_scalar(fx);
+                let b_lane = b.value_lane(fx, lane).load_scalar(fx);
+
+                let m_lane = fx.bcx.ins().icmp_imm(IntCC::Equal, m_lane, 0);
+                let res_lane =
+                    CValue::by_val(fx.bcx.ins().select(m_lane, b_lane, a_lane), lane_layout);
+
+                ret.place_lane(fx, lane).write_cvalue(fx, res_lane);
+            }
+        }
+
+        sym::simd_select_bitmask => {
+            intrinsic_args!(fx, args => (m, a, b); intrinsic);
+
+            if !a.layout().ty.is_simd() {
+                report_simd_type_validation_error(fx, intrinsic, span, a.layout().ty);
+                return;
+            }
+            assert_eq!(a.layout(), b.layout());
+
+            let (lane_count, lane_ty) = a.layout().ty.simd_size_and_type(fx.tcx);
+            let lane_layout = fx.layout_of(lane_ty);
+
+            let expected_int_bits = lane_count.max(8);
+            let expected_bytes = expected_int_bits / 8 + ((expected_int_bits % 8 > 0) as u64);
+
+            let m = match m.layout().ty.kind() {
+                ty::Uint(i) if i.bit_width() == Some(expected_int_bits) => m.load_scalar(fx),
+                ty::Array(elem, len)
+                    if matches!(elem.kind(), ty::Uint(ty::UintTy::U8))
+                        && len.try_eval_target_usize(fx.tcx, ty::ParamEnv::reveal_all())
+                            == Some(expected_bytes) =>
+                {
+                    m.force_stack(fx).0.load(
+                        fx,
+                        Type::int(expected_int_bits as u16).unwrap(),
+                        MemFlags::trusted(),
+                    )
+                }
+                _ => {
+                    fx.tcx.dcx().span_fatal(
+                        span,
+                        format!(
+                            "invalid monomorphization of `simd_select_bitmask` intrinsic: \
+                            cannot accept `{}` as mask, expected `u{}` or `[u8; {}]`",
+                            ret.layout().ty,
+                            expected_int_bits,
+                            expected_bytes
+                        ),
+                    );
+                }
+            };
+
+            for lane in 0..lane_count {
+                // The bit order of the mask depends on the byte endianness, LSB-first for
+                // little endian and MSB-first for big endian.
+                let mask_lane = match fx.tcx.sess.target.endian {
+                    Endian::Big => lane_count - 1 - lane,
+                    Endian::Little => lane,
+                };
+                let m_lane = fx.bcx.ins().ushr_imm(m, u64::from(mask_lane) as i64);
+                let m_lane = fx.bcx.ins().band_imm(m_lane, 1);
+                let a_lane = a.value_lane(fx, lane).load_scalar(fx);
+                let b_lane = b.value_lane(fx, lane).load_scalar(fx);
+
+                let m_lane = fx.bcx.ins().icmp_imm(IntCC::Equal, m_lane, 0);
+                let res_lane =
+                    CValue::by_val(fx.bcx.ins().select(m_lane, b_lane, a_lane), lane_layout);
+
+                ret.place_lane(fx, lane).write_cvalue(fx, res_lane);
+            }
+        }
+
+        sym::simd_bitmask => {
+            intrinsic_args!(fx, args => (a); intrinsic);
+
+            let (lane_count, lane_ty) = a.layout().ty.simd_size_and_type(fx.tcx);
+            let lane_clif_ty = fx.clif_type(lane_ty).unwrap();
+
+            // The `fn simd_bitmask(vector) -> unsigned integer` intrinsic takes a
+            // vector mask and returns the most significant bit (MSB) of each lane in the form
+            // of either:
+            // * an unsigned integer
+            // * an array of `u8`
+            // If the vector has less than 8 lanes, a u8 is returned with zeroed trailing bits.
+            //
+            // The bit order of the result depends on the byte endianness, LSB-first for little
+            // endian and MSB-first for big endian.
+            let expected_int_bits = lane_count.max(8);
+            let expected_bytes = expected_int_bits / 8 + ((expected_int_bits % 8 > 0) as u64);
+
+            match lane_ty.kind() {
+                ty::Int(_) | ty::Uint(_) => {}
+                _ => {
+                    fx.tcx.dcx().span_fatal(
+                        span,
+                        format!(
+                            "invalid monomorphization of `simd_bitmask` intrinsic: \
+                            vector argument `{}`'s element type `{}`, expected integer element \
+                            type",
+                            a.layout().ty,
+                            lane_ty
+                        ),
+                    );
+                }
+            }
+
+            let res_type =
+                Type::int_with_byte_size(u16::try_from(expected_bytes).unwrap()).unwrap();
+            let mut res = type_zero_value(&mut fx.bcx, res_type);
+
+            let lanes = match fx.tcx.sess.target.endian {
+                Endian::Big => Box::new(0..lane_count) as Box<dyn Iterator<Item = u64>>,
+                Endian::Little => Box::new((0..lane_count).rev()) as Box<dyn Iterator<Item = u64>>,
+            };
+            for lane in lanes {
+                let a_lane = a.value_lane(fx, lane).load_scalar(fx);
+
+                // extract sign bit of an int
+                let a_lane_sign = fx.bcx.ins().ushr_imm(a_lane, i64::from(lane_clif_ty.bits() - 1));
+
+                // shift sign bit into result
+                let a_lane_sign = clif_intcast(fx, a_lane_sign, res_type, false);
+                res = fx.bcx.ins().ishl_imm(res, 1);
+                res = fx.bcx.ins().bor(res, a_lane_sign);
+            }
+
+            match ret.layout().ty.kind() {
+                ty::Uint(i) if i.bit_width() == Some(expected_int_bits) => {}
+                ty::Array(elem, len)
+                    if matches!(elem.kind(), ty::Uint(ty::UintTy::U8))
+                        && len.try_eval_target_usize(fx.tcx, ty::ParamEnv::reveal_all())
+                            == Some(expected_bytes) => {}
+                _ => {
+                    fx.tcx.dcx().span_fatal(
+                        span,
+                        format!(
+                            "invalid monomorphization of `simd_bitmask` intrinsic: \
+                            cannot return `{}`, expected `u{}` or `[u8; {}]`",
+                            ret.layout().ty,
+                            expected_int_bits,
+                            expected_bytes
+                        ),
+                    );
+                }
+            }
+
+            let res = CValue::by_val(res, ret.layout());
+            ret.write_cvalue(fx, res);
+        }
+
+        sym::simd_saturating_add | sym::simd_saturating_sub => {
+            intrinsic_args!(fx, args => (x, y); intrinsic);
+
+            let bin_op = match intrinsic {
+                sym::simd_saturating_add => BinOp::Add,
+                sym::simd_saturating_sub => BinOp::Sub,
+                _ => unreachable!(),
+            };
+
+            // FIXME use vector instructions when possible
+            simd_pair_for_each_lane_typed(fx, x, y, ret, &|fx, x_lane, y_lane| {
+                crate::num::codegen_saturating_int_binop(fx, bin_op, x_lane, y_lane)
+            });
+        }
+
+        sym::simd_expose_provenance | sym::simd_with_exposed_provenance | sym::simd_cast_ptr => {
+            intrinsic_args!(fx, args => (arg); intrinsic);
+            ret.write_cvalue_transmute(fx, arg);
+        }
+
+        sym::simd_arith_offset => {
+            intrinsic_args!(fx, args => (ptr, offset); intrinsic);
+
+            let (lane_count, ptr_lane_ty) = ptr.layout().ty.simd_size_and_type(fx.tcx);
+            let pointee_ty = ptr_lane_ty.builtin_deref(true).unwrap();
+            let pointee_size = fx.layout_of(pointee_ty).size.bytes();
+            let (ret_lane_count, ret_lane_ty) = ret.layout().ty.simd_size_and_type(fx.tcx);
+            let ret_lane_layout = fx.layout_of(ret_lane_ty);
+            assert_eq!(lane_count, ret_lane_count);
+
+            for lane_idx in 0..lane_count {
+                let ptr_lane = ptr.value_lane(fx, lane_idx).load_scalar(fx);
+                let offset_lane = offset.value_lane(fx, lane_idx).load_scalar(fx);
+
+                let ptr_diff = if pointee_size != 1 {
+                    fx.bcx.ins().imul_imm(offset_lane, pointee_size as i64)
+                } else {
+                    offset_lane
+                };
+                let res_lane = fx.bcx.ins().iadd(ptr_lane, ptr_diff);
+                let res_lane = CValue::by_val(res_lane, ret_lane_layout);
+
+                ret.place_lane(fx, lane_idx).write_cvalue(fx, res_lane);
+            }
+        }
+
+        sym::simd_masked_store => {
+            intrinsic_args!(fx, args => (mask, ptr, val); intrinsic);
+
+            let (val_lane_count, val_lane_ty) = val.layout().ty.simd_size_and_type(fx.tcx);
+            let (mask_lane_count, _mask_lane_ty) = mask.layout().ty.simd_size_and_type(fx.tcx);
+            assert_eq!(val_lane_count, mask_lane_count);
+            let lane_clif_ty = fx.clif_type(val_lane_ty).unwrap();
+            let ptr_val = ptr.load_scalar(fx);
+
+            for lane_idx in 0..val_lane_count {
+                let val_lane = val.value_lane(fx, lane_idx).load_scalar(fx);
+                let mask_lane = mask.value_lane(fx, lane_idx).load_scalar(fx);
+
+                let if_enabled = fx.bcx.create_block();
+                let next = fx.bcx.create_block();
+
+                fx.bcx.ins().brif(mask_lane, if_enabled, &[], next, &[]);
+                fx.bcx.seal_block(if_enabled);
+
+                fx.bcx.switch_to_block(if_enabled);
+                let offset = lane_idx as i32 * lane_clif_ty.bytes() as i32;
+                fx.bcx.ins().store(MemFlags::trusted(), val_lane, ptr_val, Offset32::new(offset));
+                fx.bcx.ins().jump(next, &[]);
+
+                fx.bcx.seal_block(next);
+                fx.bcx.switch_to_block(next);
+
+                fx.bcx.ins().nop();
+            }
+        }
+
+        sym::simd_gather => {
+            intrinsic_args!(fx, args => (val, ptr, mask); intrinsic);
+
+            let (val_lane_count, val_lane_ty) = val.layout().ty.simd_size_and_type(fx.tcx);
+            let (ptr_lane_count, _ptr_lane_ty) = ptr.layout().ty.simd_size_and_type(fx.tcx);
+            let (mask_lane_count, _mask_lane_ty) = mask.layout().ty.simd_size_and_type(fx.tcx);
+            let (ret_lane_count, ret_lane_ty) = ret.layout().ty.simd_size_and_type(fx.tcx);
+            assert_eq!(val_lane_count, ptr_lane_count);
+            assert_eq!(val_lane_count, mask_lane_count);
+            assert_eq!(val_lane_count, ret_lane_count);
+
+            let lane_clif_ty = fx.clif_type(val_lane_ty).unwrap();
+            let ret_lane_layout = fx.layout_of(ret_lane_ty);
+
+            for lane_idx in 0..ptr_lane_count {
+                let val_lane = val.value_lane(fx, lane_idx).load_scalar(fx);
+                let ptr_lane = ptr.value_lane(fx, lane_idx).load_scalar(fx);
+                let mask_lane = mask.value_lane(fx, lane_idx).load_scalar(fx);
+
+                let if_enabled = fx.bcx.create_block();
+                let if_disabled = fx.bcx.create_block();
+                let next = fx.bcx.create_block();
+                let res_lane = fx.bcx.append_block_param(next, lane_clif_ty);
+
+                fx.bcx.ins().brif(mask_lane, if_enabled, &[], if_disabled, &[]);
+                fx.bcx.seal_block(if_enabled);
+                fx.bcx.seal_block(if_disabled);
+
+                fx.bcx.switch_to_block(if_enabled);
+                let res = fx.bcx.ins().load(lane_clif_ty, MemFlags::trusted(), ptr_lane, 0);
+                fx.bcx.ins().jump(next, &[res]);
+
+                fx.bcx.switch_to_block(if_disabled);
+                fx.bcx.ins().jump(next, &[val_lane]);
+
+                fx.bcx.seal_block(next);
+                fx.bcx.switch_to_block(next);
+
+                fx.bcx.ins().nop();
+
+                ret.place_lane(fx, lane_idx)
+                    .write_cvalue(fx, CValue::by_val(res_lane, ret_lane_layout));
+            }
+        }
+
+        sym::simd_masked_load => {
+            intrinsic_args!(fx, args => (mask, ptr, val); intrinsic);
+
+            let (val_lane_count, val_lane_ty) = val.layout().ty.simd_size_and_type(fx.tcx);
+            let (mask_lane_count, _mask_lane_ty) = mask.layout().ty.simd_size_and_type(fx.tcx);
+            let (ret_lane_count, ret_lane_ty) = ret.layout().ty.simd_size_and_type(fx.tcx);
+            assert_eq!(val_lane_count, mask_lane_count);
+            assert_eq!(val_lane_count, ret_lane_count);
+
+            let lane_clif_ty = fx.clif_type(val_lane_ty).unwrap();
+            let ret_lane_layout = fx.layout_of(ret_lane_ty);
+            let ptr_val = ptr.load_scalar(fx);
+
+            for lane_idx in 0..ret_lane_count {
+                let val_lane = val.value_lane(fx, lane_idx).load_scalar(fx);
+                let mask_lane = mask.value_lane(fx, lane_idx).load_scalar(fx);
+
+                let if_enabled = fx.bcx.create_block();
+                let if_disabled = fx.bcx.create_block();
+                let next = fx.bcx.create_block();
+                let res_lane = fx.bcx.append_block_param(next, lane_clif_ty);
+
+                fx.bcx.ins().brif(mask_lane, if_enabled, &[], if_disabled, &[]);
+                fx.bcx.seal_block(if_enabled);
+                fx.bcx.seal_block(if_disabled);
+
+                fx.bcx.switch_to_block(if_enabled);
+                let offset = lane_idx as i32 * lane_clif_ty.bytes() as i32;
+                let res = fx.bcx.ins().load(
+                    lane_clif_ty,
+                    MemFlags::trusted(),
+                    ptr_val,
+                    Offset32::new(offset),
+                );
+                fx.bcx.ins().jump(next, &[res]);
+
+                fx.bcx.switch_to_block(if_disabled);
+                fx.bcx.ins().jump(next, &[val_lane]);
+
+                fx.bcx.seal_block(next);
+                fx.bcx.switch_to_block(next);
+
+                fx.bcx.ins().nop();
+
+                ret.place_lane(fx, lane_idx)
+                    .write_cvalue(fx, CValue::by_val(res_lane, ret_lane_layout));
+            }
+        }
+
+        sym::simd_scatter => {
+            intrinsic_args!(fx, args => (val, ptr, mask); intrinsic);
+
+            let (val_lane_count, _val_lane_ty) = val.layout().ty.simd_size_and_type(fx.tcx);
+            let (ptr_lane_count, _ptr_lane_ty) = ptr.layout().ty.simd_size_and_type(fx.tcx);
+            let (mask_lane_count, _mask_lane_ty) = mask.layout().ty.simd_size_and_type(fx.tcx);
+            assert_eq!(val_lane_count, ptr_lane_count);
+            assert_eq!(val_lane_count, mask_lane_count);
+
+            for lane_idx in 0..ptr_lane_count {
+                let val_lane = val.value_lane(fx, lane_idx).load_scalar(fx);
+                let ptr_lane = ptr.value_lane(fx, lane_idx).load_scalar(fx);
+                let mask_lane = mask.value_lane(fx, lane_idx).load_scalar(fx);
+
+                let if_enabled = fx.bcx.create_block();
+                let next = fx.bcx.create_block();
+
+                fx.bcx.ins().brif(mask_lane, if_enabled, &[], next, &[]);
+                fx.bcx.seal_block(if_enabled);
+
+                fx.bcx.switch_to_block(if_enabled);
+                fx.bcx.ins().store(MemFlags::trusted(), val_lane, ptr_lane, 0);
+                fx.bcx.ins().jump(next, &[]);
+
+                fx.bcx.seal_block(next);
+                fx.bcx.switch_to_block(next);
+            }
+        }
+
+        _ => {
+            fx.tcx.dcx().span_err(span, format!("Unknown SIMD intrinsic {}", intrinsic));
+            // Prevent verifier error
+            fx.bcx.ins().trap(TrapCode::UnreachableCodeReached);
+            return;
+        }
+    }
+    let ret_block = fx.get_block(target);
+    fx.bcx.ins().jump(ret_block, &[]);
+}
diff --git a/compiler/rustc_codegen_cranelift/src/lib.rs b/compiler/rustc_codegen_cranelift/src/lib.rs
new file mode 100644
index 00000000000..39bbad16b0c
--- /dev/null
+++ b/compiler/rustc_codegen_cranelift/src/lib.rs
@@ -0,0 +1,352 @@
+#![cfg_attr(doc, allow(internal_features))]
+#![cfg_attr(doc, feature(rustdoc_internals))]
+#![cfg_attr(doc, doc(rust_logo))]
+#![feature(rustc_private)]
+// Note: please avoid adding other feature gates where possible
+#![allow(rustc::diagnostic_outside_of_impl)]
+#![allow(rustc::untranslatable_diagnostic)]
+#![warn(rust_2018_idioms)]
+#![warn(unused_lifetimes)]
+#![warn(unreachable_pub)]
+
+extern crate jobserver;
+#[macro_use]
+extern crate rustc_middle;
+extern crate rustc_ast;
+extern crate rustc_codegen_ssa;
+extern crate rustc_data_structures;
+extern crate rustc_errors;
+extern crate rustc_fs_util;
+extern crate rustc_hir;
+extern crate rustc_incremental;
+extern crate rustc_index;
+extern crate rustc_metadata;
+extern crate rustc_monomorphize;
+extern crate rustc_session;
+extern crate rustc_span;
+extern crate rustc_target;
+
+// This prevents duplicating functions and statics that are already part of the host rustc process.
+#[allow(unused_extern_crates)]
+extern crate rustc_driver;
+
+use std::any::Any;
+use std::cell::{Cell, RefCell};
+use std::sync::Arc;
+
+use cranelift_codegen::isa::TargetIsa;
+use cranelift_codegen::settings::{self, Configurable};
+use rustc_codegen_ssa::traits::CodegenBackend;
+use rustc_codegen_ssa::CodegenResults;
+use rustc_data_structures::profiling::SelfProfilerRef;
+use rustc_errors::ErrorGuaranteed;
+use rustc_metadata::EncodedMetadata;
+use rustc_middle::dep_graph::{WorkProduct, WorkProductId};
+use rustc_session::config::OutputFilenames;
+use rustc_session::Session;
+use rustc_span::{sym, Symbol};
+
+pub use crate::config::*;
+use crate::prelude::*;
+
+mod abi;
+mod allocator;
+mod analyze;
+mod archive;
+mod base;
+mod cast;
+mod codegen_i128;
+mod common;
+mod compiler_builtins;
+mod concurrency_limiter;
+mod config;
+mod constant;
+mod debuginfo;
+mod discriminant;
+mod driver;
+mod global_asm;
+mod inline_asm;
+mod intrinsics;
+mod linkage;
+mod main_shim;
+mod num;
+mod optimize;
+mod pointer;
+mod pretty_clif;
+mod toolchain;
+mod trap;
+mod unsize;
+mod value_and_place;
+mod vtable;
+
+mod prelude {
+    pub(crate) use cranelift_codegen::ir::condcodes::{FloatCC, IntCC};
+    pub(crate) use cranelift_codegen::ir::function::Function;
+    pub(crate) use cranelift_codegen::ir::types;
+    pub(crate) use cranelift_codegen::ir::{
+        AbiParam, Block, FuncRef, Inst, InstBuilder, MemFlags, Signature, SourceLoc, StackSlot,
+        StackSlotData, StackSlotKind, TrapCode, Type, Value,
+    };
+    pub(crate) use cranelift_codegen::Context;
+    pub(crate) use cranelift_module::{self, DataDescription, FuncId, Linkage, Module};
+    pub(crate) use rustc_data_structures::fx::{FxHashMap, FxIndexMap};
+    pub(crate) use rustc_hir::def_id::{DefId, LOCAL_CRATE};
+    pub(crate) use rustc_index::Idx;
+    pub(crate) use rustc_middle::mir::{self, *};
+    pub(crate) use rustc_middle::ty::layout::{LayoutOf, TyAndLayout};
+    pub(crate) use rustc_middle::ty::{
+        self, FloatTy, Instance, InstanceDef, IntTy, ParamEnv, Ty, TyCtxt, UintTy,
+    };
+    pub(crate) use rustc_span::Span;
+    pub(crate) use rustc_target::abi::{Abi, FieldIdx, Scalar, Size, VariantIdx, FIRST_VARIANT};
+
+    pub(crate) use crate::abi::*;
+    pub(crate) use crate::base::{codegen_operand, codegen_place};
+    pub(crate) use crate::cast::*;
+    pub(crate) use crate::common::*;
+    pub(crate) use crate::debuginfo::{DebugContext, UnwindContext};
+    pub(crate) use crate::pointer::Pointer;
+    pub(crate) use crate::value_and_place::{CPlace, CValue};
+}
+
+struct PrintOnPanic<F: Fn() -> String>(F);
+impl<F: Fn() -> String> Drop for PrintOnPanic<F> {
+    fn drop(&mut self) {
+        if ::std::thread::panicking() {
+            println!("{}", (self.0)());
+        }
+    }
+}
+
+/// The codegen context holds any information shared between the codegen of individual functions
+/// inside a single codegen unit with the exception of the Cranelift [`Module`](cranelift_module::Module).
+struct CodegenCx {
+    profiler: SelfProfilerRef,
+    output_filenames: Arc<OutputFilenames>,
+    should_write_ir: bool,
+    global_asm: String,
+    inline_asm_index: Cell<usize>,
+    debug_context: Option<DebugContext>,
+    unwind_context: UnwindContext,
+    cgu_name: Symbol,
+}
+
+impl CodegenCx {
+    fn new(
+        tcx: TyCtxt<'_>,
+        backend_config: BackendConfig,
+        isa: &dyn TargetIsa,
+        debug_info: bool,
+        cgu_name: Symbol,
+    ) -> Self {
+        assert_eq!(pointer_ty(tcx), isa.pointer_type());
+
+        let unwind_context =
+            UnwindContext::new(isa, matches!(backend_config.codegen_mode, CodegenMode::Aot));
+        let debug_context = if debug_info && !tcx.sess.target.options.is_like_windows {
+            Some(DebugContext::new(tcx, isa, cgu_name.as_str()))
+        } else {
+            None
+        };
+        CodegenCx {
+            profiler: tcx.prof.clone(),
+            output_filenames: tcx.output_filenames(()).clone(),
+            should_write_ir: crate::pretty_clif::should_write_ir(tcx),
+            global_asm: String::new(),
+            inline_asm_index: Cell::new(0),
+            debug_context,
+            unwind_context,
+            cgu_name,
+        }
+    }
+}
+
+pub struct CraneliftCodegenBackend {
+    pub config: RefCell<Option<BackendConfig>>,
+}
+
+impl CodegenBackend for CraneliftCodegenBackend {
+    fn locale_resource(&self) -> &'static str {
+        // FIXME(rust-lang/rust#100717) - cranelift codegen backend is not yet translated
+        ""
+    }
+
+    fn init(&self, sess: &Session) {
+        use rustc_session::config::Lto;
+        match sess.lto() {
+            Lto::No | Lto::ThinLocal => {}
+            Lto::Thin | Lto::Fat => {
+                sess.dcx().warn("LTO is not supported. You may get a linker error.")
+            }
+        }
+
+        let mut config = self.config.borrow_mut();
+        if config.is_none() {
+            let new_config = BackendConfig::from_opts(&sess.opts.cg.llvm_args)
+                .unwrap_or_else(|err| sess.dcx().fatal(err));
+            *config = Some(new_config);
+        }
+    }
+
+    fn target_features(&self, sess: &Session, _allow_unstable: bool) -> Vec<rustc_span::Symbol> {
+        // FIXME return the actually used target features. this is necessary for #[cfg(target_feature)]
+        if sess.target.arch == "x86_64" && sess.target.os != "none" {
+            // x86_64 mandates SSE2 support
+            vec![Symbol::intern("fxsr"), sym::sse, Symbol::intern("sse2")]
+        } else if sess.target.arch == "aarch64" && sess.target.os != "none" {
+            // AArch64 mandates Neon support
+            vec![sym::neon]
+        } else {
+            vec![]
+        }
+    }
+
+    fn print_version(&self) {
+        println!("Cranelift version: {}", cranelift_codegen::VERSION);
+    }
+
+    fn codegen_crate(
+        &self,
+        tcx: TyCtxt<'_>,
+        metadata: EncodedMetadata,
+        need_metadata_module: bool,
+    ) -> Box<dyn Any> {
+        tcx.dcx().abort_if_errors();
+        let config = self.config.borrow().clone().unwrap();
+        match config.codegen_mode {
+            CodegenMode::Aot => driver::aot::run_aot(tcx, config, metadata, need_metadata_module),
+            CodegenMode::Jit | CodegenMode::JitLazy => {
+                #[cfg(feature = "jit")]
+                driver::jit::run_jit(tcx, config);
+
+                #[cfg(not(feature = "jit"))]
+                tcx.dcx().fatal("jit support was disabled when compiling rustc_codegen_cranelift");
+            }
+        }
+    }
+
+    fn join_codegen(
+        &self,
+        ongoing_codegen: Box<dyn Any>,
+        sess: &Session,
+        outputs: &OutputFilenames,
+    ) -> (CodegenResults, FxIndexMap<WorkProductId, WorkProduct>) {
+        ongoing_codegen.downcast::<driver::aot::OngoingCodegen>().unwrap().join(
+            sess,
+            outputs,
+            self.config.borrow().as_ref().unwrap(),
+        )
+    }
+
+    fn link(
+        &self,
+        sess: &Session,
+        codegen_results: CodegenResults,
+        outputs: &OutputFilenames,
+    ) -> Result<(), ErrorGuaranteed> {
+        use rustc_codegen_ssa::back::link::link_binary;
+
+        link_binary(sess, &crate::archive::ArArchiveBuilderBuilder, &codegen_results, outputs)
+    }
+}
+
+fn target_triple(sess: &Session) -> target_lexicon::Triple {
+    match sess.target.llvm_target.parse() {
+        Ok(triple) => triple,
+        Err(err) => sess.dcx().fatal(format!("target not recognized: {}", err)),
+    }
+}
+
+fn build_isa(sess: &Session, backend_config: &BackendConfig) -> Arc<dyn TargetIsa + 'static> {
+    use target_lexicon::BinaryFormat;
+
+    let target_triple = crate::target_triple(sess);
+
+    let mut flags_builder = settings::builder();
+    flags_builder.enable("is_pic").unwrap();
+    let enable_verifier = if backend_config.enable_verifier { "true" } else { "false" };
+    flags_builder.set("enable_verifier", enable_verifier).unwrap();
+    flags_builder.set("regalloc_checker", enable_verifier).unwrap();
+
+    let preserve_frame_pointer = sess.target.options.frame_pointer
+        != rustc_target::spec::FramePointer::MayOmit
+        || matches!(sess.opts.cg.force_frame_pointers, Some(true));
+    flags_builder
+        .set("preserve_frame_pointers", if preserve_frame_pointer { "true" } else { "false" })
+        .unwrap();
+
+    let tls_model = match target_triple.binary_format {
+        BinaryFormat::Elf => "elf_gd",
+        BinaryFormat::Macho => "macho",
+        BinaryFormat::Coff => "coff",
+        _ => "none",
+    };
+    flags_builder.set("tls_model", tls_model).unwrap();
+
+    flags_builder.set("enable_llvm_abi_extensions", "true").unwrap();
+
+    use rustc_session::config::OptLevel;
+    match sess.opts.optimize {
+        OptLevel::No => {
+            flags_builder.set("opt_level", "none").unwrap();
+        }
+        OptLevel::Less | OptLevel::Default => {}
+        OptLevel::Size | OptLevel::SizeMin | OptLevel::Aggressive => {
+            flags_builder.set("opt_level", "speed_and_size").unwrap();
+        }
+    }
+
+    if let target_lexicon::Architecture::Aarch64(_)
+    | target_lexicon::Architecture::Riscv64(_)
+    | target_lexicon::Architecture::X86_64 = target_triple.architecture
+    {
+        // Windows depends on stack probes to grow the committed part of the stack.
+        // On other platforms it helps prevents stack smashing.
+        flags_builder.enable("enable_probestack").unwrap();
+        flags_builder.set("probestack_strategy", "inline").unwrap();
+    } else {
+        // __cranelift_probestack is not provided and inline stack probes are only supported on
+        // AArch64, Riscv64 and x86_64.
+        flags_builder.set("enable_probestack", "false").unwrap();
+    }
+
+    let flags = settings::Flags::new(flags_builder);
+
+    let isa_builder = match sess.opts.cg.target_cpu.as_deref() {
+        Some("native") => cranelift_native::builder_with_options(true).unwrap(),
+        Some(value) => {
+            let mut builder =
+                cranelift_codegen::isa::lookup(target_triple.clone()).unwrap_or_else(|err| {
+                    sess.dcx().fatal(format!("can't compile for {}: {}", target_triple, err));
+                });
+            if builder.enable(value).is_err() {
+                sess.dcx()
+                    .fatal("the specified target cpu isn't currently supported by Cranelift.");
+            }
+            builder
+        }
+        None => {
+            let mut builder =
+                cranelift_codegen::isa::lookup(target_triple.clone()).unwrap_or_else(|err| {
+                    sess.dcx().fatal(format!("can't compile for {}: {}", target_triple, err));
+                });
+            if target_triple.architecture == target_lexicon::Architecture::X86_64 {
+                // Only set the target cpu on x86_64 as Cranelift is missing
+                // the target cpu list for most other targets.
+                builder.enable(sess.target.cpu.as_ref()).unwrap();
+            }
+            builder
+        }
+    };
+
+    match isa_builder.finish(flags) {
+        Ok(target_isa) => target_isa,
+        Err(err) => sess.dcx().fatal(format!("failed to build TargetIsa: {}", err)),
+    }
+}
+
+/// This is the entrypoint for a hot plugged rustc_codegen_cranelift
+#[no_mangle]
+pub fn __rustc_codegen_backend() -> Box<dyn CodegenBackend> {
+    Box::new(CraneliftCodegenBackend { config: RefCell::new(None) })
+}
diff --git a/compiler/rustc_codegen_cranelift/src/linkage.rs b/compiler/rustc_codegen_cranelift/src/linkage.rs
new file mode 100644
index 00000000000..ca853aac158
--- /dev/null
+++ b/compiler/rustc_codegen_cranelift/src/linkage.rs
@@ -0,0 +1,36 @@
+use rustc_middle::mir::mono::{Linkage as RLinkage, MonoItem, Visibility};
+
+use crate::prelude::*;
+
+pub(crate) fn get_clif_linkage(
+    mono_item: MonoItem<'_>,
+    linkage: RLinkage,
+    visibility: Visibility,
+    is_compiler_builtins: bool,
+) -> Linkage {
+    match (linkage, visibility) {
+        (RLinkage::External, Visibility::Default) if is_compiler_builtins => Linkage::Hidden,
+        (RLinkage::External, Visibility::Default) => Linkage::Export,
+        (RLinkage::Internal, Visibility::Default) => Linkage::Local,
+        (RLinkage::External, Visibility::Hidden) => Linkage::Hidden,
+        (RLinkage::WeakAny, Visibility::Default) => Linkage::Preemptible,
+        _ => panic!("{:?} = {:?} {:?}", mono_item, linkage, visibility),
+    }
+}
+
+pub(crate) fn get_static_linkage(tcx: TyCtxt<'_>, def_id: DefId) -> Linkage {
+    let fn_attrs = tcx.codegen_fn_attrs(def_id);
+
+    if let Some(linkage) = fn_attrs.linkage {
+        match linkage {
+            RLinkage::External => Linkage::Export,
+            RLinkage::Internal => Linkage::Local,
+            RLinkage::ExternalWeak | RLinkage::WeakAny => Linkage::Preemptible,
+            _ => panic!("{:?}", linkage),
+        }
+    } else if tcx.is_reachable_non_generic(def_id) {
+        Linkage::Export
+    } else {
+        Linkage::Hidden
+    }
+}
diff --git a/compiler/rustc_codegen_cranelift/src/main_shim.rs b/compiler/rustc_codegen_cranelift/src/main_shim.rs
new file mode 100644
index 00000000000..f9a729618a5
--- /dev/null
+++ b/compiler/rustc_codegen_cranelift/src/main_shim.rs
@@ -0,0 +1,176 @@
+use cranelift_frontend::{FunctionBuilder, FunctionBuilderContext};
+use rustc_hir::LangItem;
+use rustc_middle::ty::AssocKind;
+use rustc_middle::ty::GenericArg;
+use rustc_session::config::{sigpipe, EntryFnType};
+use rustc_span::symbol::Ident;
+
+use crate::prelude::*;
+
+/// Create the `main` function which will initialize the rust runtime and call
+/// users main function.
+pub(crate) fn maybe_create_entry_wrapper(
+    tcx: TyCtxt<'_>,
+    module: &mut impl Module,
+    unwind_context: &mut UnwindContext,
+    is_jit: bool,
+    is_primary_cgu: bool,
+) {
+    let (main_def_id, (is_main_fn, sigpipe)) = match tcx.entry_fn(()) {
+        Some((def_id, entry_ty)) => (
+            def_id,
+            match entry_ty {
+                EntryFnType::Main { sigpipe } => (true, sigpipe),
+                EntryFnType::Start => (false, sigpipe::DEFAULT),
+            },
+        ),
+        None => return,
+    };
+
+    if main_def_id.is_local() {
+        let instance = Instance::mono(tcx, main_def_id).polymorphize(tcx);
+        if module.get_name(tcx.symbol_name(instance).name).is_none() {
+            return;
+        }
+    } else if !is_primary_cgu {
+        return;
+    }
+
+    create_entry_fn(tcx, module, unwind_context, main_def_id, is_jit, is_main_fn, sigpipe);
+
+    fn create_entry_fn(
+        tcx: TyCtxt<'_>,
+        m: &mut impl Module,
+        unwind_context: &mut UnwindContext,
+        rust_main_def_id: DefId,
+        ignore_lang_start_wrapper: bool,
+        is_main_fn: bool,
+        sigpipe: u8,
+    ) {
+        let main_ret_ty = tcx.fn_sig(rust_main_def_id).no_bound_vars().unwrap().output();
+        // Given that `main()` has no arguments,
+        // then its return type cannot have
+        // late-bound regions, since late-bound
+        // regions must appear in the argument
+        // listing.
+        let main_ret_ty = tcx.normalize_erasing_regions(
+            ty::ParamEnv::reveal_all(),
+            main_ret_ty.no_bound_vars().unwrap(),
+        );
+
+        let cmain_sig = Signature {
+            params: vec![
+                AbiParam::new(m.target_config().pointer_type()),
+                AbiParam::new(m.target_config().pointer_type()),
+            ],
+            returns: vec![AbiParam::new(m.target_config().pointer_type() /*isize*/)],
+            call_conv: crate::conv_to_call_conv(
+                tcx.sess,
+                tcx.sess.target.options.entry_abi,
+                m.target_config().default_call_conv,
+            ),
+        };
+
+        let entry_name = tcx.sess.target.options.entry_name.as_ref();
+        let cmain_func_id = match m.declare_function(entry_name, Linkage::Export, &cmain_sig) {
+            Ok(func_id) => func_id,
+            Err(err) => {
+                tcx.dcx()
+                    .fatal(format!("entry symbol `{entry_name}` declared multiple times: {err}"));
+            }
+        };
+
+        let instance = Instance::mono(tcx, rust_main_def_id).polymorphize(tcx);
+
+        let main_name = tcx.symbol_name(instance).name;
+        let main_sig = get_function_sig(tcx, m.target_config().default_call_conv, instance);
+        let main_func_id = m.declare_function(main_name, Linkage::Import, &main_sig).unwrap();
+
+        let mut ctx = Context::new();
+        ctx.func.signature = cmain_sig;
+        {
+            let mut func_ctx = FunctionBuilderContext::new();
+            let mut bcx = FunctionBuilder::new(&mut ctx.func, &mut func_ctx);
+
+            let block = bcx.create_block();
+            bcx.switch_to_block(block);
+            let arg_argc = bcx.append_block_param(block, m.target_config().pointer_type());
+            let arg_argv = bcx.append_block_param(block, m.target_config().pointer_type());
+            let arg_sigpipe = bcx.ins().iconst(types::I8, sigpipe as i64);
+
+            let main_func_ref = m.declare_func_in_func(main_func_id, &mut bcx.func);
+
+            let result = if is_main_fn && ignore_lang_start_wrapper {
+                // regular main fn, but ignoring #[lang = "start"] as we are running in the jit
+                // FIXME set program arguments somehow
+                let call_inst = bcx.ins().call(main_func_ref, &[]);
+                let call_results = bcx.func.dfg.inst_results(call_inst).to_owned();
+
+                let termination_trait = tcx.require_lang_item(LangItem::Termination, None);
+                let report = tcx
+                    .associated_items(termination_trait)
+                    .find_by_name_and_kind(
+                        tcx,
+                        Ident::from_str("report"),
+                        AssocKind::Fn,
+                        termination_trait,
+                    )
+                    .unwrap();
+                let report = Instance::expect_resolve(
+                    tcx,
+                    ParamEnv::reveal_all(),
+                    report.def_id,
+                    tcx.mk_args(&[GenericArg::from(main_ret_ty)]),
+                )
+                .polymorphize(tcx);
+
+                let report_name = tcx.symbol_name(report).name;
+                let report_sig = get_function_sig(tcx, m.target_config().default_call_conv, report);
+                let report_func_id =
+                    m.declare_function(report_name, Linkage::Import, &report_sig).unwrap();
+                let report_func_ref = m.declare_func_in_func(report_func_id, &mut bcx.func);
+
+                // FIXME do proper abi handling instead of expecting the pass mode to be identical
+                // for returns and arguments.
+                let report_call_inst = bcx.ins().call(report_func_ref, &call_results);
+                let res = bcx.func.dfg.inst_results(report_call_inst)[0];
+                match m.target_config().pointer_type() {
+                    types::I32 => res,
+                    types::I64 => bcx.ins().sextend(types::I64, res),
+                    _ => unimplemented!("16bit systems are not yet supported"),
+                }
+            } else if is_main_fn {
+                let start_def_id = tcx.require_lang_item(LangItem::Start, None);
+                let start_instance = Instance::expect_resolve(
+                    tcx,
+                    ParamEnv::reveal_all(),
+                    start_def_id,
+                    tcx.mk_args(&[main_ret_ty.into()]),
+                )
+                .polymorphize(tcx);
+                let start_func_id = import_function(tcx, m, start_instance);
+
+                let main_val = bcx.ins().func_addr(m.target_config().pointer_type(), main_func_ref);
+
+                let func_ref = m.declare_func_in_func(start_func_id, &mut bcx.func);
+                let call_inst =
+                    bcx.ins().call(func_ref, &[main_val, arg_argc, arg_argv, arg_sigpipe]);
+                bcx.inst_results(call_inst)[0]
+            } else {
+                // using user-defined start fn
+                let call_inst = bcx.ins().call(main_func_ref, &[arg_argc, arg_argv]);
+                bcx.inst_results(call_inst)[0]
+            };
+
+            bcx.ins().return_(&[result]);
+            bcx.seal_all_blocks();
+            bcx.finalize();
+        }
+
+        if let Err(err) = m.define_function(cmain_func_id, &mut ctx) {
+            tcx.dcx().fatal(format!("entry symbol `{entry_name}` defined multiple times: {err}"));
+        }
+
+        unwind_context.add_function(cmain_func_id, &ctx, m.isa());
+    }
+}
diff --git a/compiler/rustc_codegen_cranelift/src/num.rs b/compiler/rustc_codegen_cranelift/src/num.rs
new file mode 100644
index 00000000000..4d96a26ea4f
--- /dev/null
+++ b/compiler/rustc_codegen_cranelift/src/num.rs
@@ -0,0 +1,464 @@
+//! Various operations on integer and floating-point numbers
+
+use crate::prelude::*;
+
+pub(crate) fn bin_op_to_intcc(bin_op: BinOp, signed: bool) -> Option<IntCC> {
+    use BinOp::*;
+    use IntCC::*;
+    Some(match bin_op {
+        Eq => Equal,
+        Lt => {
+            if signed {
+                SignedLessThan
+            } else {
+                UnsignedLessThan
+            }
+        }
+        Le => {
+            if signed {
+                SignedLessThanOrEqual
+            } else {
+                UnsignedLessThanOrEqual
+            }
+        }
+        Ne => NotEqual,
+        Ge => {
+            if signed {
+                SignedGreaterThanOrEqual
+            } else {
+                UnsignedGreaterThanOrEqual
+            }
+        }
+        Gt => {
+            if signed {
+                SignedGreaterThan
+            } else {
+                UnsignedGreaterThan
+            }
+        }
+        _ => return None,
+    })
+}
+
+fn codegen_three_way_compare<'tcx>(
+    fx: &mut FunctionCx<'_, '_, 'tcx>,
+    signed: bool,
+    lhs: Value,
+    rhs: Value,
+) -> CValue<'tcx> {
+    // This emits `(lhs > rhs) - (lhs < rhs)`, which is cranelift's preferred form per
+    // <https://github.com/bytecodealliance/wasmtime/blob/8052bb9e3b792503b225f2a5b2ba3bc023bff462/cranelift/codegen/src/prelude_opt.isle#L41-L47>
+    let gt_cc = crate::num::bin_op_to_intcc(BinOp::Gt, signed).unwrap();
+    let lt_cc = crate::num::bin_op_to_intcc(BinOp::Lt, signed).unwrap();
+    let gt = fx.bcx.ins().icmp(gt_cc, lhs, rhs);
+    let lt = fx.bcx.ins().icmp(lt_cc, lhs, rhs);
+    let val = fx.bcx.ins().isub(gt, lt);
+    CValue::by_val(val, fx.layout_of(fx.tcx.ty_ordering_enum(Some(fx.mir.span))))
+}
+
+fn codegen_compare_bin_op<'tcx>(
+    fx: &mut FunctionCx<'_, '_, 'tcx>,
+    bin_op: BinOp,
+    signed: bool,
+    lhs: Value,
+    rhs: Value,
+) -> CValue<'tcx> {
+    if bin_op == BinOp::Cmp {
+        return codegen_three_way_compare(fx, signed, lhs, rhs);
+    }
+
+    let intcc = crate::num::bin_op_to_intcc(bin_op, signed).unwrap();
+    let val = fx.bcx.ins().icmp(intcc, lhs, rhs);
+    CValue::by_val(val, fx.layout_of(fx.tcx.types.bool))
+}
+
+pub(crate) fn codegen_binop<'tcx>(
+    fx: &mut FunctionCx<'_, '_, 'tcx>,
+    bin_op: BinOp,
+    in_lhs: CValue<'tcx>,
+    in_rhs: CValue<'tcx>,
+) -> CValue<'tcx> {
+    match bin_op {
+        BinOp::Eq | BinOp::Lt | BinOp::Le | BinOp::Ne | BinOp::Ge | BinOp::Gt | BinOp::Cmp => {
+            match in_lhs.layout().ty.kind() {
+                ty::Bool | ty::Uint(_) | ty::Int(_) | ty::Char => {
+                    let signed = type_sign(in_lhs.layout().ty);
+                    let lhs = in_lhs.load_scalar(fx);
+                    let rhs = in_rhs.load_scalar(fx);
+
+                    return codegen_compare_bin_op(fx, bin_op, signed, lhs, rhs);
+                }
+                _ => {}
+            }
+        }
+        _ => {}
+    }
+
+    match in_lhs.layout().ty.kind() {
+        ty::Bool => crate::num::codegen_bool_binop(fx, bin_op, in_lhs, in_rhs),
+        ty::Uint(_) | ty::Int(_) => crate::num::codegen_int_binop(fx, bin_op, in_lhs, in_rhs),
+        ty::Float(_) => crate::num::codegen_float_binop(fx, bin_op, in_lhs, in_rhs),
+        ty::RawPtr(..) | ty::FnPtr(..) => crate::num::codegen_ptr_binop(fx, bin_op, in_lhs, in_rhs),
+        _ => unreachable!("{:?}({:?}, {:?})", bin_op, in_lhs.layout().ty, in_rhs.layout().ty),
+    }
+}
+
+pub(crate) fn codegen_bool_binop<'tcx>(
+    fx: &mut FunctionCx<'_, '_, 'tcx>,
+    bin_op: BinOp,
+    in_lhs: CValue<'tcx>,
+    in_rhs: CValue<'tcx>,
+) -> CValue<'tcx> {
+    let lhs = in_lhs.load_scalar(fx);
+    let rhs = in_rhs.load_scalar(fx);
+
+    let b = fx.bcx.ins();
+    let res = match bin_op {
+        BinOp::BitXor => b.bxor(lhs, rhs),
+        BinOp::BitAnd => b.band(lhs, rhs),
+        BinOp::BitOr => b.bor(lhs, rhs),
+        // Compare binops handles by `codegen_binop`.
+        _ => unreachable!("{:?}({:?}, {:?})", bin_op, in_lhs, in_rhs),
+    };
+
+    CValue::by_val(res, fx.layout_of(fx.tcx.types.bool))
+}
+
+pub(crate) fn codegen_int_binop<'tcx>(
+    fx: &mut FunctionCx<'_, '_, 'tcx>,
+    bin_op: BinOp,
+    in_lhs: CValue<'tcx>,
+    in_rhs: CValue<'tcx>,
+) -> CValue<'tcx> {
+    if !matches!(bin_op, BinOp::Shl | BinOp::ShlUnchecked | BinOp::Shr | BinOp::ShrUnchecked) {
+        assert_eq!(
+            in_lhs.layout().ty,
+            in_rhs.layout().ty,
+            "int binop requires lhs and rhs of same type"
+        );
+    }
+
+    if let Some(res) = crate::codegen_i128::maybe_codegen(fx, bin_op, in_lhs, in_rhs) {
+        return res;
+    }
+
+    let signed = type_sign(in_lhs.layout().ty);
+
+    let lhs = in_lhs.load_scalar(fx);
+    let rhs = in_rhs.load_scalar(fx);
+
+    let b = fx.bcx.ins();
+    // FIXME trap on overflow for the Unchecked versions
+    let val = match bin_op {
+        BinOp::Add | BinOp::AddUnchecked => b.iadd(lhs, rhs),
+        BinOp::Sub | BinOp::SubUnchecked => b.isub(lhs, rhs),
+        BinOp::Mul | BinOp::MulUnchecked => b.imul(lhs, rhs),
+        BinOp::Div => {
+            if signed {
+                b.sdiv(lhs, rhs)
+            } else {
+                b.udiv(lhs, rhs)
+            }
+        }
+        BinOp::Rem => {
+            if signed {
+                b.srem(lhs, rhs)
+            } else {
+                b.urem(lhs, rhs)
+            }
+        }
+        BinOp::BitXor => b.bxor(lhs, rhs),
+        BinOp::BitAnd => b.band(lhs, rhs),
+        BinOp::BitOr => b.bor(lhs, rhs),
+        BinOp::Shl | BinOp::ShlUnchecked => b.ishl(lhs, rhs),
+        BinOp::Shr | BinOp::ShrUnchecked => {
+            if signed {
+                b.sshr(lhs, rhs)
+            } else {
+                b.ushr(lhs, rhs)
+            }
+        }
+        BinOp::Offset => unreachable!("Offset is not an integer operation"),
+        // Compare binops handles by `codegen_binop`.
+        BinOp::Eq | BinOp::Ne | BinOp::Lt | BinOp::Le | BinOp::Gt | BinOp::Ge | BinOp::Cmp => {
+            unreachable!("{:?}({:?}, {:?})", bin_op, in_lhs.layout().ty, in_rhs.layout().ty);
+        }
+    };
+
+    CValue::by_val(val, in_lhs.layout())
+}
+
+pub(crate) fn codegen_checked_int_binop<'tcx>(
+    fx: &mut FunctionCx<'_, '_, 'tcx>,
+    bin_op: BinOp,
+    in_lhs: CValue<'tcx>,
+    in_rhs: CValue<'tcx>,
+) -> CValue<'tcx> {
+    let lhs = in_lhs.load_scalar(fx);
+    let rhs = in_rhs.load_scalar(fx);
+
+    if let Some(res) = crate::codegen_i128::maybe_codegen_checked(fx, bin_op, in_lhs, in_rhs) {
+        return res;
+    }
+
+    let signed = type_sign(in_lhs.layout().ty);
+
+    let (res, has_overflow) = match bin_op {
+        BinOp::Add => {
+            /*let (val, c_out) = fx.bcx.ins().iadd_cout(lhs, rhs);
+            (val, c_out)*/
+            // FIXME(CraneStation/cranelift#849) legalize iadd_cout for i8 and i16
+            let val = fx.bcx.ins().iadd(lhs, rhs);
+            let has_overflow = if !signed {
+                fx.bcx.ins().icmp(IntCC::UnsignedLessThan, val, lhs)
+            } else {
+                let rhs_is_negative = fx.bcx.ins().icmp_imm(IntCC::SignedLessThan, rhs, 0);
+                let slt = fx.bcx.ins().icmp(IntCC::SignedLessThan, val, lhs);
+                fx.bcx.ins().bxor(rhs_is_negative, slt)
+            };
+            (val, has_overflow)
+        }
+        BinOp::Sub => {
+            /*let (val, b_out) = fx.bcx.ins().isub_bout(lhs, rhs);
+            (val, b_out)*/
+            // FIXME(CraneStation/cranelift#849) legalize isub_bout for i8 and i16
+            let val = fx.bcx.ins().isub(lhs, rhs);
+            let has_overflow = if !signed {
+                fx.bcx.ins().icmp(IntCC::UnsignedGreaterThan, val, lhs)
+            } else {
+                let rhs_is_negative = fx.bcx.ins().icmp_imm(IntCC::SignedLessThan, rhs, 0);
+                let sgt = fx.bcx.ins().icmp(IntCC::SignedGreaterThan, val, lhs);
+                fx.bcx.ins().bxor(rhs_is_negative, sgt)
+            };
+            (val, has_overflow)
+        }
+        BinOp::Mul => {
+            let ty = fx.bcx.func.dfg.value_type(lhs);
+            match ty {
+                types::I8 | types::I16 | types::I32 if !signed => {
+                    let lhs = fx.bcx.ins().uextend(ty.double_width().unwrap(), lhs);
+                    let rhs = fx.bcx.ins().uextend(ty.double_width().unwrap(), rhs);
+                    let val = fx.bcx.ins().imul(lhs, rhs);
+                    let has_overflow = fx.bcx.ins().icmp_imm(
+                        IntCC::UnsignedGreaterThan,
+                        val,
+                        (1 << ty.bits()) - 1,
+                    );
+                    let val = fx.bcx.ins().ireduce(ty, val);
+                    (val, has_overflow)
+                }
+                types::I8 | types::I16 | types::I32 if signed => {
+                    let lhs = fx.bcx.ins().sextend(ty.double_width().unwrap(), lhs);
+                    let rhs = fx.bcx.ins().sextend(ty.double_width().unwrap(), rhs);
+                    let val = fx.bcx.ins().imul(lhs, rhs);
+                    let has_underflow =
+                        fx.bcx.ins().icmp_imm(IntCC::SignedLessThan, val, -(1 << (ty.bits() - 1)));
+                    let has_overflow = fx.bcx.ins().icmp_imm(
+                        IntCC::SignedGreaterThan,
+                        val,
+                        (1 << (ty.bits() - 1)) - 1,
+                    );
+                    let val = fx.bcx.ins().ireduce(ty, val);
+                    (val, fx.bcx.ins().bor(has_underflow, has_overflow))
+                }
+                types::I64 => {
+                    let val = fx.bcx.ins().imul(lhs, rhs);
+                    let has_overflow = if !signed {
+                        let val_hi = fx.bcx.ins().umulhi(lhs, rhs);
+                        fx.bcx.ins().icmp_imm(IntCC::NotEqual, val_hi, 0)
+                    } else {
+                        // Based on LLVM's instruction sequence for compiling
+                        // a.checked_mul(b).is_some() to riscv64gc:
+                        // mulh    a2, a0, a1
+                        // mul     a0, a0, a1
+                        // srai    a0, a0, 63
+                        // xor     a0, a0, a2
+                        // snez    a0, a0
+                        let val_hi = fx.bcx.ins().smulhi(lhs, rhs);
+                        let val_sign = fx.bcx.ins().sshr_imm(val, i64::from(ty.bits() - 1));
+                        let xor = fx.bcx.ins().bxor(val_hi, val_sign);
+                        fx.bcx.ins().icmp_imm(IntCC::NotEqual, xor, 0)
+                    };
+                    (val, has_overflow)
+                }
+                types::I128 => {
+                    unreachable!("i128 should have been handled by codegen_i128::maybe_codegen")
+                }
+                _ => unreachable!("invalid non-integer type {}", ty),
+            }
+        }
+        _ => bug!("binop {:?} on checked int/uint lhs: {:?} rhs: {:?}", bin_op, in_lhs, in_rhs),
+    };
+
+    let out_layout = fx.layout_of(Ty::new_tup(fx.tcx, &[in_lhs.layout().ty, fx.tcx.types.bool]));
+    CValue::by_val_pair(res, has_overflow, out_layout)
+}
+
+pub(crate) fn codegen_saturating_int_binop<'tcx>(
+    fx: &mut FunctionCx<'_, '_, 'tcx>,
+    bin_op: BinOp,
+    lhs: CValue<'tcx>,
+    rhs: CValue<'tcx>,
+) -> CValue<'tcx> {
+    assert_eq!(lhs.layout().ty, rhs.layout().ty);
+
+    let signed = type_sign(lhs.layout().ty);
+    let clif_ty = fx.clif_type(lhs.layout().ty).unwrap();
+    let (min, max) = type_min_max_value(&mut fx.bcx, clif_ty, signed);
+
+    let checked_res = crate::num::codegen_checked_int_binop(fx, bin_op, lhs, rhs);
+    let (val, has_overflow) = checked_res.load_scalar_pair(fx);
+
+    let val = match (bin_op, signed) {
+        (BinOp::Add, false) => fx.bcx.ins().select(has_overflow, max, val),
+        (BinOp::Sub, false) => fx.bcx.ins().select(has_overflow, min, val),
+        (BinOp::Add, true) => {
+            let rhs = rhs.load_scalar(fx);
+            let rhs_ge_zero = fx.bcx.ins().icmp_imm(IntCC::SignedGreaterThanOrEqual, rhs, 0);
+            let sat_val = fx.bcx.ins().select(rhs_ge_zero, max, min);
+            fx.bcx.ins().select(has_overflow, sat_val, val)
+        }
+        (BinOp::Sub, true) => {
+            let rhs = rhs.load_scalar(fx);
+            let rhs_ge_zero = fx.bcx.ins().icmp_imm(IntCC::SignedGreaterThanOrEqual, rhs, 0);
+            let sat_val = fx.bcx.ins().select(rhs_ge_zero, min, max);
+            fx.bcx.ins().select(has_overflow, sat_val, val)
+        }
+        _ => unreachable!(),
+    };
+
+    CValue::by_val(val, lhs.layout())
+}
+
+pub(crate) fn codegen_float_binop<'tcx>(
+    fx: &mut FunctionCx<'_, '_, 'tcx>,
+    bin_op: BinOp,
+    in_lhs: CValue<'tcx>,
+    in_rhs: CValue<'tcx>,
+) -> CValue<'tcx> {
+    assert_eq!(in_lhs.layout().ty, in_rhs.layout().ty);
+
+    let lhs = in_lhs.load_scalar(fx);
+    let rhs = in_rhs.load_scalar(fx);
+
+    let b = fx.bcx.ins();
+    let res = match bin_op {
+        BinOp::Add => b.fadd(lhs, rhs),
+        BinOp::Sub => b.fsub(lhs, rhs),
+        BinOp::Mul => b.fmul(lhs, rhs),
+        BinOp::Div => b.fdiv(lhs, rhs),
+        BinOp::Rem => {
+            let (name, ty) = match in_lhs.layout().ty.kind() {
+                ty::Float(FloatTy::F32) => ("fmodf", types::F32),
+                ty::Float(FloatTy::F64) => ("fmod", types::F64),
+                _ => bug!(),
+            };
+
+            let ret_val = fx.lib_call(
+                name,
+                vec![AbiParam::new(ty), AbiParam::new(ty)],
+                vec![AbiParam::new(ty)],
+                &[lhs, rhs],
+            )[0];
+
+            return CValue::by_val(ret_val, in_lhs.layout());
+        }
+        BinOp::Eq | BinOp::Lt | BinOp::Le | BinOp::Ne | BinOp::Ge | BinOp::Gt => {
+            let fltcc = match bin_op {
+                BinOp::Eq => FloatCC::Equal,
+                BinOp::Lt => FloatCC::LessThan,
+                BinOp::Le => FloatCC::LessThanOrEqual,
+                BinOp::Ne => FloatCC::NotEqual,
+                BinOp::Ge => FloatCC::GreaterThanOrEqual,
+                BinOp::Gt => FloatCC::GreaterThan,
+                _ => unreachable!(),
+            };
+            let val = fx.bcx.ins().fcmp(fltcc, lhs, rhs);
+            return CValue::by_val(val, fx.layout_of(fx.tcx.types.bool));
+        }
+        _ => unreachable!("{:?}({:?}, {:?})", bin_op, in_lhs, in_rhs),
+    };
+
+    CValue::by_val(res, in_lhs.layout())
+}
+
+pub(crate) fn codegen_ptr_binop<'tcx>(
+    fx: &mut FunctionCx<'_, '_, 'tcx>,
+    bin_op: BinOp,
+    in_lhs: CValue<'tcx>,
+    in_rhs: CValue<'tcx>,
+) -> CValue<'tcx> {
+    let is_thin_ptr =
+        in_lhs.layout().ty.builtin_deref(true).map(|ty| !has_ptr_meta(fx.tcx, ty)).unwrap_or(true);
+
+    if is_thin_ptr {
+        match bin_op {
+            BinOp::Eq | BinOp::Lt | BinOp::Le | BinOp::Ne | BinOp::Ge | BinOp::Gt => {
+                let lhs = in_lhs.load_scalar(fx);
+                let rhs = in_rhs.load_scalar(fx);
+
+                codegen_compare_bin_op(fx, bin_op, false, lhs, rhs)
+            }
+            BinOp::Offset => {
+                let pointee_ty = in_lhs.layout().ty.builtin_deref(true).unwrap();
+                let (base, offset) = (in_lhs, in_rhs.load_scalar(fx));
+                let pointee_size = fx.layout_of(pointee_ty).size.bytes();
+                let ptr_diff = fx.bcx.ins().imul_imm(offset, pointee_size as i64);
+                let base_val = base.load_scalar(fx);
+                let res = fx.bcx.ins().iadd(base_val, ptr_diff);
+                CValue::by_val(res, base.layout())
+            }
+            _ => unreachable!("{:?}({:?}, {:?})", bin_op, in_lhs, in_rhs),
+        }
+    } else {
+        let (lhs_ptr, lhs_extra) = in_lhs.load_scalar_pair(fx);
+        let (rhs_ptr, rhs_extra) = in_rhs.load_scalar_pair(fx);
+
+        let res = match bin_op {
+            BinOp::Eq => {
+                let ptr_eq = fx.bcx.ins().icmp(IntCC::Equal, lhs_ptr, rhs_ptr);
+                let extra_eq = fx.bcx.ins().icmp(IntCC::Equal, lhs_extra, rhs_extra);
+                fx.bcx.ins().band(ptr_eq, extra_eq)
+            }
+            BinOp::Ne => {
+                let ptr_ne = fx.bcx.ins().icmp(IntCC::NotEqual, lhs_ptr, rhs_ptr);
+                let extra_ne = fx.bcx.ins().icmp(IntCC::NotEqual, lhs_extra, rhs_extra);
+                fx.bcx.ins().bor(ptr_ne, extra_ne)
+            }
+            BinOp::Lt | BinOp::Le | BinOp::Ge | BinOp::Gt => {
+                let ptr_eq = fx.bcx.ins().icmp(IntCC::Equal, lhs_ptr, rhs_ptr);
+
+                let ptr_cmp =
+                    fx.bcx.ins().icmp(bin_op_to_intcc(bin_op, false).unwrap(), lhs_ptr, rhs_ptr);
+                let extra_cmp = fx.bcx.ins().icmp(
+                    bin_op_to_intcc(bin_op, false).unwrap(),
+                    lhs_extra,
+                    rhs_extra,
+                );
+
+                fx.bcx.ins().select(ptr_eq, extra_cmp, ptr_cmp)
+            }
+            _ => panic!("bin_op {:?} on ptr", bin_op),
+        };
+
+        CValue::by_val(res, fx.layout_of(fx.tcx.types.bool))
+    }
+}
+
+// In Rust floating point min and max don't propagate NaN. In Cranelift they do however.
+// For this reason it is necessary to use `a.is_nan() ? b : (a >= b ? b : a)` for `minnumf*`
+// and `a.is_nan() ? b : (a <= b ? b : a)` for `maxnumf*`. NaN checks are done by comparing
+// a float against itself. Only in case of NaN is it not equal to itself.
+pub(crate) fn codegen_float_min(fx: &mut FunctionCx<'_, '_, '_>, a: Value, b: Value) -> Value {
+    let a_is_nan = fx.bcx.ins().fcmp(FloatCC::NotEqual, a, a);
+    let a_ge_b = fx.bcx.ins().fcmp(FloatCC::GreaterThanOrEqual, a, b);
+    let temp = fx.bcx.ins().select(a_ge_b, b, a);
+    fx.bcx.ins().select(a_is_nan, b, temp)
+}
+
+pub(crate) fn codegen_float_max(fx: &mut FunctionCx<'_, '_, '_>, a: Value, b: Value) -> Value {
+    let a_is_nan = fx.bcx.ins().fcmp(FloatCC::NotEqual, a, a);
+    let a_le_b = fx.bcx.ins().fcmp(FloatCC::LessThanOrEqual, a, b);
+    let temp = fx.bcx.ins().select(a_le_b, b, a);
+    fx.bcx.ins().select(a_is_nan, b, temp)
+}
diff --git a/compiler/rustc_codegen_cranelift/src/optimize/mod.rs b/compiler/rustc_codegen_cranelift/src/optimize/mod.rs
new file mode 100644
index 00000000000..0df7e82294b
--- /dev/null
+++ b/compiler/rustc_codegen_cranelift/src/optimize/mod.rs
@@ -0,0 +1,3 @@
+//! Various optimizations specific to cg_clif
+
+pub(crate) mod peephole;
diff --git a/compiler/rustc_codegen_cranelift/src/optimize/peephole.rs b/compiler/rustc_codegen_cranelift/src/optimize/peephole.rs
new file mode 100644
index 00000000000..26327dca299
--- /dev/null
+++ b/compiler/rustc_codegen_cranelift/src/optimize/peephole.rs
@@ -0,0 +1,47 @@
+//! Peephole optimizations that can be performed while creating clif ir.
+
+use cranelift_codegen::ir::{condcodes::IntCC, InstructionData, Opcode, Value, ValueDef};
+use cranelift_frontend::FunctionBuilder;
+
+/// If the given value was produced by the lowering of `Rvalue::Not` return the input and true,
+/// otherwise return the given value and false.
+pub(crate) fn maybe_unwrap_bool_not(bcx: &mut FunctionBuilder<'_>, arg: Value) -> (Value, bool) {
+    if let ValueDef::Result(arg_inst, 0) = bcx.func.dfg.value_def(arg) {
+        match bcx.func.dfg.insts[arg_inst] {
+            // This is the lowering of `Rvalue::Not`
+            InstructionData::IntCompareImm {
+                opcode: Opcode::IcmpImm,
+                cond: IntCC::Equal,
+                arg,
+                imm,
+            } if imm.bits() == 0 => (arg, true),
+            _ => (arg, false),
+        }
+    } else {
+        (arg, false)
+    }
+}
+
+/// Returns whether the branch is statically known to be taken or `None` if it isn't statically known.
+pub(crate) fn maybe_known_branch_taken(
+    bcx: &FunctionBuilder<'_>,
+    arg: Value,
+    test_zero: bool,
+) -> Option<bool> {
+    let arg_inst = if let ValueDef::Result(arg_inst, 0) = bcx.func.dfg.value_def(arg) {
+        arg_inst
+    } else {
+        return None;
+    };
+
+    match bcx.func.dfg.insts[arg_inst] {
+        InstructionData::UnaryImm { opcode: Opcode::Iconst, imm } => {
+            if test_zero {
+                Some(imm.bits() == 0)
+            } else {
+                Some(imm.bits() != 0)
+            }
+        }
+        _ => None,
+    }
+}
diff --git a/compiler/rustc_codegen_cranelift/src/pointer.rs b/compiler/rustc_codegen_cranelift/src/pointer.rs
new file mode 100644
index 00000000000..11ac6b94678
--- /dev/null
+++ b/compiler/rustc_codegen_cranelift/src/pointer.rs
@@ -0,0 +1,128 @@
+//! Defines [`Pointer`] which is used to improve the quality of the generated clif ir for pointer
+//! operations.
+
+use cranelift_codegen::ir::immediates::Offset32;
+use rustc_target::abi::Align;
+
+use crate::prelude::*;
+
+/// A pointer pointing either to a certain address, a certain stack slot or nothing.
+#[derive(Copy, Clone, Debug)]
+pub(crate) struct Pointer {
+    base: PointerBase,
+    offset: Offset32,
+}
+
+#[derive(Copy, Clone, Debug)]
+pub(crate) enum PointerBase {
+    Addr(Value),
+    Stack(StackSlot),
+    Dangling(Align),
+}
+
+impl Pointer {
+    pub(crate) fn new(addr: Value) -> Self {
+        Pointer { base: PointerBase::Addr(addr), offset: Offset32::new(0) }
+    }
+
+    pub(crate) fn stack_slot(stack_slot: StackSlot) -> Self {
+        Pointer { base: PointerBase::Stack(stack_slot), offset: Offset32::new(0) }
+    }
+
+    pub(crate) fn dangling(align: Align) -> Self {
+        Pointer { base: PointerBase::Dangling(align), offset: Offset32::new(0) }
+    }
+
+    pub(crate) fn debug_base_and_offset(self) -> (PointerBase, Offset32) {
+        (self.base, self.offset)
+    }
+
+    pub(crate) fn get_addr(self, fx: &mut FunctionCx<'_, '_, '_>) -> Value {
+        match self.base {
+            PointerBase::Addr(base_addr) => {
+                let offset: i64 = self.offset.into();
+                if offset == 0 { base_addr } else { fx.bcx.ins().iadd_imm(base_addr, offset) }
+            }
+            PointerBase::Stack(stack_slot) => {
+                fx.bcx.ins().stack_addr(fx.pointer_type, stack_slot, self.offset)
+            }
+            PointerBase::Dangling(align) => {
+                fx.bcx.ins().iconst(fx.pointer_type, i64::try_from(align.bytes()).unwrap())
+            }
+        }
+    }
+
+    pub(crate) fn offset(self, fx: &mut FunctionCx<'_, '_, '_>, extra_offset: Offset32) -> Self {
+        self.offset_i64(fx, extra_offset.into())
+    }
+
+    pub(crate) fn offset_i64(self, fx: &mut FunctionCx<'_, '_, '_>, extra_offset: i64) -> Self {
+        if let Some(new_offset) = self.offset.try_add_i64(extra_offset) {
+            Pointer { base: self.base, offset: new_offset }
+        } else {
+            let base_offset: i64 = self.offset.into();
+            if let Some(new_offset) = base_offset.checked_add(extra_offset) {
+                let base_addr = match self.base {
+                    PointerBase::Addr(addr) => addr,
+                    PointerBase::Stack(stack_slot) => {
+                        fx.bcx.ins().stack_addr(fx.pointer_type, stack_slot, 0)
+                    }
+                    PointerBase::Dangling(align) => {
+                        fx.bcx.ins().iconst(fx.pointer_type, i64::try_from(align.bytes()).unwrap())
+                    }
+                };
+                let addr = fx.bcx.ins().iadd_imm(base_addr, new_offset);
+                Pointer { base: PointerBase::Addr(addr), offset: Offset32::new(0) }
+            } else {
+                panic!(
+                    "self.offset ({}) + extra_offset ({}) not representable in i64",
+                    base_offset, extra_offset
+                );
+            }
+        }
+    }
+
+    pub(crate) fn offset_value(self, fx: &mut FunctionCx<'_, '_, '_>, extra_offset: Value) -> Self {
+        match self.base {
+            PointerBase::Addr(addr) => Pointer {
+                base: PointerBase::Addr(fx.bcx.ins().iadd(addr, extra_offset)),
+                offset: self.offset,
+            },
+            PointerBase::Stack(stack_slot) => {
+                let base_addr = fx.bcx.ins().stack_addr(fx.pointer_type, stack_slot, self.offset);
+                Pointer {
+                    base: PointerBase::Addr(fx.bcx.ins().iadd(base_addr, extra_offset)),
+                    offset: Offset32::new(0),
+                }
+            }
+            PointerBase::Dangling(align) => {
+                let addr =
+                    fx.bcx.ins().iconst(fx.pointer_type, i64::try_from(align.bytes()).unwrap());
+                Pointer {
+                    base: PointerBase::Addr(fx.bcx.ins().iadd(addr, extra_offset)),
+                    offset: self.offset,
+                }
+            }
+        }
+    }
+
+    pub(crate) fn load(self, fx: &mut FunctionCx<'_, '_, '_>, ty: Type, flags: MemFlags) -> Value {
+        match self.base {
+            PointerBase::Addr(base_addr) => fx.bcx.ins().load(ty, flags, base_addr, self.offset),
+            PointerBase::Stack(stack_slot) => fx.bcx.ins().stack_load(ty, stack_slot, self.offset),
+            PointerBase::Dangling(_align) => unreachable!(),
+        }
+    }
+
+    pub(crate) fn store(self, fx: &mut FunctionCx<'_, '_, '_>, value: Value, flags: MemFlags) {
+        match self.base {
+            PointerBase::Addr(base_addr) => {
+                fx.bcx.ins().store(flags, value, base_addr, self.offset);
+            }
+            PointerBase::Stack(stack_slot) => {
+                fx.bcx.ins().stack_store(value, stack_slot, self.offset);
+            }
+            PointerBase::Dangling(_align) => unreachable!(),
+        }
+    }
+}
diff --git a/compiler/rustc_codegen_cranelift/src/pretty_clif.rs b/compiler/rustc_codegen_cranelift/src/pretty_clif.rs
new file mode 100644
index 00000000000..196418023d9
--- /dev/null
+++ b/compiler/rustc_codegen_cranelift/src/pretty_clif.rs
@@ -0,0 +1,281 @@
+//! This module provides the [CommentWriter] which makes it possible
+//! to add comments to the written cranelift ir.
+//!
+//! # Example
+//!
+//! ```clif
+//! test compile
+//! target x86_64
+//!
+//! function u0:22(i64) -> i8, i8 system_v {
+//! ; symbol _ZN97_$LT$example..IsNotEmpty$u20$as$u20$mini_core..FnOnce$LT$$LP$$RF$$RF$$u5b$u16$u5d$$C$$RP$$GT$$GT$9call_once17hd517c453d67c0915E
+//! ; instance Instance { def: Item(WithOptConstParam { did: DefId(0:42 ~ example[4e51]::{impl#0}::call_once), const_param_did: None }), args: [ReErased, ReErased] }
+//! ; abi FnAbi { args: [ArgAbi { layout: TyAndLayout { ty: IsNotEmpty, layout: Layout { size: Size(0 bytes), align: AbiAndPrefAlign { abi: Align(1 bytes), pref: Align(8 bytes) }, abi: Aggregate { sized: true }, fields: Arbitrary { offsets: [], memory_index: [] }, largest_niche: None, variants: Single { index: 0 } } }, mode: Ignore }, ArgAbi { layout: TyAndLayout { ty: &&[u16], layout: Layout { size: Size(8 bytes), align: AbiAndPrefAlign { abi: Align(8 bytes), pref: Align(8 bytes) }, abi: Scalar(Initialized { value: Pointer(AddressSpace(0)), valid_range: 1..=18446744073709551615 }), fields: Primitive, largest_niche: Some(Niche { offset: Size(0 bytes), value: Pointer(AddressSpace(0)), valid_range: 1..=18446744073709551615 }), variants: Single { index: 0 } } }, mode: Direct(ArgAttributes { regular: NonNull | NoUndef, arg_ext: None, pointee_size: Size(0 bytes), pointee_align: Some(Align(8 bytes)) }) }], ret: ArgAbi { layout: TyAndLayout { ty: (u8, u8), layout: Layout { size: Size(2 bytes), align: AbiAndPrefAlign { abi: Align(1 bytes), pref: Align(8 bytes) }, abi: ScalarPair(Initialized { value: Int(I8, false), valid_range: 0..=255 }, Initialized { value: Int(I8, false), valid_range: 0..=255 }), fields: Arbitrary { offsets: [Size(0 bytes), Size(1 bytes)], memory_index: [0, 1] }, largest_niche: None, variants: Single { index: 0 } } }, mode: Pair(ArgAttributes { regular: NoUndef, arg_ext: None, pointee_size: Size(0 bytes), pointee_align: None }, ArgAttributes { regular: NoUndef, arg_ext: None, pointee_size: Size(0 bytes), pointee_align: None }) }, c_variadic: false, fixed_count: 1, conv: Rust, can_unwind: false }
+//!
+//! ; kind  loc.idx   param    pass mode                            ty
+//! ; ssa   _0    (u8, u8)                          2b 1, 8              var=(0, 1)
+//! ; ret   _0      -          Pair(ArgAttributes { regular: NoUndef, arg_ext: None, pointee_size: Size(0 bytes), pointee_align: None }, ArgAttributes { regular: NoUndef, arg_ext: None, pointee_size: Size(0 bytes), pointee_align: None }) (u8, u8)
+//! ; arg   _1      -          Ignore                               IsNotEmpty
+//! ; arg   _2.0    = v0       Direct(ArgAttributes { regular: NonNull | NoUndef, arg_ext: None, pointee_size: Size(0 bytes), pointee_align: Some(Align(8 bytes)) }) &&[u16]
+//!
+//! ; kind  local ty                              size align (abi,pref)
+//! ; zst   _1    IsNotEmpty                        0b 1, 8              align=8,offset=
+//! ; stack _2    (&&[u16],)                        8b 8, 8              storage=ss0
+//! ; ssa   _3    &mut IsNotEmpty                   8b 8, 8              var=2
+//!
+//!     ss0 = explicit_slot 16
+//!     sig0 = (i64, i64) -> i8, i8 system_v
+//!     fn0 = colocated u0:23 sig0 ; Instance { def: Item(WithOptConstParam { did: DefId(0:46 ~ example[4e51]::{impl#1}::call_mut), const_param_did: None }), args: [ReErased, ReErased] }
+//!
+//! block0(v0: i64):
+//!     nop
+//! ; write_cvalue: Addr(Pointer { base: Stack(ss0), offset: Offset32(0) }, None): &&[u16] <- ByVal(v0): &&[u16]
+//!     stack_store v0, ss0
+//!     jump block1
+//!
+//! block1:
+//!     nop
+//! ; _3 = &mut _1
+//!     v1 = iconst.i64 8
+//! ; write_cvalue: Var(_3, var2): &mut IsNotEmpty <- ByVal(v1): &mut IsNotEmpty
+//! ;
+//! ; _0 = <IsNotEmpty as mini_core::FnMut<(&&[u16],)>>::call_mut(move _3, _2)
+//!     v2 = stack_load.i64 ss0
+//!     v3, v4 = call fn0(v1, v2)  ; v1 = 8
+//!     v5 -> v3
+//!     v6 -> v4
+//! ; write_cvalue: VarPair(_0, var0, var1): (u8, u8) <- ByValPair(v3, v4): (u8, u8)
+//!     jump block2
+//!
+//! block2:
+//!     nop
+//! ;
+//! ; return
+//!     return v5, v6
+//! }
+//! ```
+
+use std::fmt;
+use std::io::Write;
+
+use cranelift_codegen::entity::SecondaryMap;
+use cranelift_codegen::ir::entities::AnyEntity;
+use cranelift_codegen::ir::Fact;
+use cranelift_codegen::write::{FuncWriter, PlainWriter};
+use rustc_middle::ty::layout::FnAbiOf;
+use rustc_middle::ty::print::with_no_trimmed_paths;
+use rustc_session::config::{OutputFilenames, OutputType};
+
+use crate::prelude::*;
+
+#[derive(Clone, Debug)]
+pub(crate) struct CommentWriter {
+    enabled: bool,
+    global_comments: Vec<String>,
+    entity_comments: FxHashMap<AnyEntity, String>,
+}
+
+impl CommentWriter {
+    pub(crate) fn new<'tcx>(tcx: TyCtxt<'tcx>, instance: Instance<'tcx>) -> Self {
+        let enabled = should_write_ir(tcx);
+        let global_comments = if enabled {
+            with_no_trimmed_paths!({
+                vec![
+                    format!("symbol {}", tcx.symbol_name(instance).name),
+                    format!("instance {:?}", instance),
+                    format!(
+                        "abi {:?}",
+                        RevealAllLayoutCx(tcx).fn_abi_of_instance(instance, ty::List::empty())
+                    ),
+                    String::new(),
+                ]
+            })
+        } else {
+            vec![]
+        };
+
+        CommentWriter { enabled, global_comments, entity_comments: FxHashMap::default() }
+    }
+}
+
+impl CommentWriter {
+    pub(crate) fn enabled(&self) -> bool {
+        self.enabled
+    }
+
+    pub(crate) fn add_global_comment<S: Into<String>>(&mut self, comment: S) {
+        debug_assert!(self.enabled);
+        self.global_comments.push(comment.into());
+    }
+
+    pub(crate) fn add_comment<S: Into<String> + AsRef<str>, E: Into<AnyEntity>>(
+        &mut self,
+        entity: E,
+        comment: S,
+    ) {
+        debug_assert!(self.enabled);
+
+        use std::collections::hash_map::Entry;
+        match self.entity_comments.entry(entity.into()) {
+            Entry::Occupied(mut occ) => {
+                occ.get_mut().push('\n');
+                occ.get_mut().push_str(comment.as_ref());
+            }
+            Entry::Vacant(vac) => {
+                vac.insert(comment.into());
+            }
+        }
+    }
+}
+
+impl FuncWriter for &'_ CommentWriter {
+    fn write_preamble(
+        &mut self,
+        w: &mut dyn fmt::Write,
+        func: &Function,
+    ) -> Result<bool, fmt::Error> {
+        for comment in &self.global_comments {
+            if !comment.is_empty() {
+                writeln!(w, "; {}", comment)?;
+            } else {
+                writeln!(w)?;
+            }
+        }
+        if !self.global_comments.is_empty() {
+            writeln!(w)?;
+        }
+
+        self.super_preamble(w, func)
+    }
+
+    fn write_entity_definition(
+        &mut self,
+        w: &mut dyn fmt::Write,
+        _func: &Function,
+        entity: AnyEntity,
+        value: &dyn fmt::Display,
+        maybe_fact: Option<&Fact>,
+    ) -> fmt::Result {
+        if let Some(fact) = maybe_fact {
+            write!(w, "    {} ! {} = {}", entity, fact, value)?;
+        } else {
+            write!(w, "    {} = {}", entity, value)?;
+        }
+
+        if let Some(comment) = self.entity_comments.get(&entity) {
+            writeln!(w, " ; {}", comment.replace('\n', "\n; "))
+        } else {
+            writeln!(w)
+        }
+    }
+
+    fn write_block_header(
+        &mut self,
+        w: &mut dyn fmt::Write,
+        func: &Function,
+        block: Block,
+        indent: usize,
+    ) -> fmt::Result {
+        PlainWriter.write_block_header(w, func, block, indent)
+    }
+
+    fn write_instruction(
+        &mut self,
+        w: &mut dyn fmt::Write,
+        func: &Function,
+        aliases: &SecondaryMap<Value, Vec<Value>>,
+        inst: Inst,
+        indent: usize,
+    ) -> fmt::Result {
+        PlainWriter.write_instruction(w, func, aliases, inst, indent)?;
+        if let Some(comment) = self.entity_comments.get(&inst.into()) {
+            writeln!(w, "; {}", comment.replace('\n', "\n; "))?;
+        }
+        Ok(())
+    }
+}
+
+impl FunctionCx<'_, '_, '_> {
+    pub(crate) fn add_global_comment<S: Into<String>>(&mut self, comment: S) {
+        self.clif_comments.add_global_comment(comment);
+    }
+
+    pub(crate) fn add_comment<S: Into<String> + AsRef<str>, E: Into<AnyEntity>>(
+        &mut self,
+        entity: E,
+        comment: S,
+    ) {
+        self.clif_comments.add_comment(entity, comment);
+    }
+}
+
+pub(crate) fn should_write_ir(tcx: TyCtxt<'_>) -> bool {
+    tcx.sess.opts.output_types.contains_key(&OutputType::LlvmAssembly)
+}
+
+pub(crate) fn write_ir_file(
+    output_filenames: &OutputFilenames,
+    name: &str,
+    write: impl FnOnce(&mut dyn Write) -> std::io::Result<()>,
+) {
+    let clif_output_dir = output_filenames.with_extension("clif");
+
+    match std::fs::create_dir(&clif_output_dir) {
+        Ok(()) => {}
+        Err(err) if err.kind() == std::io::ErrorKind::AlreadyExists => {}
+        res @ Err(_) => res.unwrap(),
+    }
+
+    let clif_file_name = clif_output_dir.join(name);
+
+    let res = std::fs::File::create(clif_file_name).and_then(|mut file| write(&mut file));
+    if let Err(err) = res {
+        // Using early_warn as no Session is available here
+        let handler =
+            rustc_session::EarlyDiagCtxt::new(rustc_session::config::ErrorOutputType::default());
+        handler.early_warn(format!("error writing ir file: {}", err));
+    }
+}
+
+pub(crate) fn write_clif_file(
+    output_filenames: &OutputFilenames,
+    symbol_name: &str,
+    postfix: &str,
+    isa: &dyn cranelift_codegen::isa::TargetIsa,
+    func: &cranelift_codegen::ir::Function,
+    mut clif_comments: &CommentWriter,
+) {
+    // FIXME work around filename too long errors
+    write_ir_file(output_filenames, &format!("{}.{}.clif", symbol_name, postfix), |file| {
+        let mut clif = String::new();
+        cranelift_codegen::write::decorate_function(&mut clif_comments, &mut clif, func).unwrap();
+
+        for flag in isa.flags().iter() {
+            writeln!(file, "set {}", flag)?;
+        }
+        write!(file, "target {}", isa.triple().architecture)?;
+        for isa_flag in isa.isa_flags().iter() {
+            write!(file, " {}", isa_flag)?;
+        }
+        writeln!(file, "\n")?;
+        writeln!(file)?;
+        file.write_all(clif.as_bytes())?;
+        Ok(())
+    });
+}
+
+impl fmt::Debug for FunctionCx<'_, '_, '_> {
+    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
+        writeln!(f, "{:?}", self.instance.args)?;
+        writeln!(f, "{:?}", self.local_map)?;
+
+        let mut clif = String::new();
+        ::cranelift_codegen::write::decorate_function(
+            &mut &self.clif_comments,
+            &mut clif,
+            &self.bcx.func,
+        )
+        .unwrap();
+        writeln!(f, "\n{}", clif)
+    }
+}
diff --git a/compiler/rustc_codegen_cranelift/src/toolchain.rs b/compiler/rustc_codegen_cranelift/src/toolchain.rs
new file mode 100644
index 00000000000..b6b465e1f4e
--- /dev/null
+++ b/compiler/rustc_codegen_cranelift/src/toolchain.rs
@@ -0,0 +1,29 @@
+//! Locating various executables part of a C toolchain.
+
+use std::path::PathBuf;
+
+use rustc_codegen_ssa::back::link::linker_and_flavor;
+use rustc_session::Session;
+
+/// Tries to infer the path of a binary for the target toolchain from the linker name.
+pub(crate) fn get_toolchain_binary(sess: &Session, tool: &str) -> PathBuf {
+    let (mut linker, _linker_flavor) = linker_and_flavor(sess);
+    let linker_file_name =
+        linker.file_name().unwrap().to_str().expect("linker filename should be valid UTF-8");
+
+    if linker_file_name == "ld.lld" {
+        if tool != "ld" {
+            linker.set_file_name(tool)
+        }
+    } else {
+        let tool_file_name = linker_file_name
+            .replace("ld", tool)
+            .replace("gcc", tool)
+            .replace("clang", tool)
+            .replace("cc", tool);
+
+        linker.set_file_name(tool_file_name)
+    }
+
+    linker
+}
diff --git a/compiler/rustc_codegen_cranelift/src/trap.rs b/compiler/rustc_codegen_cranelift/src/trap.rs
new file mode 100644
index 00000000000..2fb0c2164c3
--- /dev/null
+++ b/compiler/rustc_codegen_cranelift/src/trap.rs
@@ -0,0 +1,38 @@
+//! Helpers used to print a message and abort in case of certain panics and some detected UB.
+
+use crate::prelude::*;
+
+fn codegen_print(fx: &mut FunctionCx<'_, '_, '_>, msg: &str) {
+    let puts = fx
+        .module
+        .declare_function(
+            "puts",
+            Linkage::Import,
+            &Signature {
+                call_conv: fx.target_config.default_call_conv,
+                params: vec![AbiParam::new(fx.pointer_type)],
+                returns: vec![AbiParam::new(types::I32)],
+            },
+        )
+        .unwrap();
+    let puts = fx.module.declare_func_in_func(puts, &mut fx.bcx.func);
+    if fx.clif_comments.enabled() {
+        fx.add_comment(puts, "puts");
+    }
+
+    let real_msg = format!("trap at {:?} ({}): {}\0", fx.instance, fx.symbol_name, msg);
+    let msg_ptr = fx.anonymous_str(&real_msg);
+    fx.bcx.ins().call(puts, &[msg_ptr]);
+}
+
+/// Use this when something is unimplemented, but `libcore` or `libstd` requires it to codegen.
+///
+/// Trap code: user65535
+pub(crate) fn trap_unimplemented(fx: &mut FunctionCx<'_, '_, '_>, msg: impl AsRef<str>) {
+    codegen_print(fx, msg.as_ref());
+
+    let one = fx.bcx.ins().iconst(types::I32, 1);
+    fx.lib_call("exit", vec![AbiParam::new(types::I32)], vec![], &[one]);
+
+    fx.bcx.ins().trap(TrapCode::User(!0));
+}
diff --git a/compiler/rustc_codegen_cranelift/src/unsize.rs b/compiler/rustc_codegen_cranelift/src/unsize.rs
new file mode 100644
index 00000000000..4acbc8a27ed
--- /dev/null
+++ b/compiler/rustc_codegen_cranelift/src/unsize.rs
@@ -0,0 +1,313 @@
+//! Codegen of the [`PointerCoercion::Unsize`] operation.
+//!
+//! [`PointerCoercion::Unsize`]: `rustc_middle::ty::adjustment::PointerCoercion::Unsize`
+
+use rustc_middle::ty::print::{with_no_trimmed_paths, with_no_visible_paths};
+
+use crate::base::codegen_panic_nounwind;
+use crate::prelude::*;
+
+// Adapted from https://github.com/rust-lang/rust/blob/2a663555ddf36f6b041445894a8c175cd1bc718c/src/librustc_codegen_ssa/base.rs#L159-L307
+
+/// Retrieve the information we are losing (making dynamic) in an unsizing
+/// adjustment.
+///
+/// The `old_info` argument is a bit funny. It is intended for use
+/// in an upcast, where the new vtable for an object will be derived
+/// from the old one.
+pub(crate) fn unsized_info<'tcx>(
+    fx: &mut FunctionCx<'_, '_, 'tcx>,
+    source: Ty<'tcx>,
+    target: Ty<'tcx>,
+    old_info: Option<Value>,
+) -> Value {
+    let (source, target) =
+        fx.tcx.struct_lockstep_tails_erasing_lifetimes(source, target, ParamEnv::reveal_all());
+    match (&source.kind(), &target.kind()) {
+        (&ty::Array(_, len), &ty::Slice(_)) => fx
+            .bcx
+            .ins()
+            .iconst(fx.pointer_type, len.eval_target_usize(fx.tcx, ParamEnv::reveal_all()) as i64),
+        (&ty::Dynamic(data_a, _, src_dyn_kind), &ty::Dynamic(data_b, _, target_dyn_kind))
+            if src_dyn_kind == target_dyn_kind =>
+        {
+            let old_info =
+                old_info.expect("unsized_info: missing old info for trait upcasting coercion");
+            if data_a.principal_def_id() == data_b.principal_def_id() {
+                // A NOP cast that doesn't actually change anything, should be allowed even with invalid vtables.
+                return old_info;
+            }
+
+            // trait upcasting coercion
+            let vptr_entry_idx =
+                fx.tcx.vtable_trait_upcasting_coercion_new_vptr_slot((source, target));
+
+            if let Some(entry_idx) = vptr_entry_idx {
+                let entry_idx = u32::try_from(entry_idx).unwrap();
+                let entry_offset = entry_idx * fx.pointer_type.bytes();
+                let vptr_ptr = Pointer::new(old_info).offset_i64(fx, entry_offset.into()).load(
+                    fx,
+                    fx.pointer_type,
+                    crate::vtable::vtable_memflags(),
+                );
+                vptr_ptr
+            } else {
+                old_info
+            }
+        }
+        (_, ty::Dynamic(data, ..)) => crate::vtable::get_vtable(fx, source, data.principal()),
+        _ => bug!("unsized_info: invalid unsizing {:?} -> {:?}", source, target),
+    }
+}
+
+/// Coerce `src` to `dst_ty`.
+fn unsize_ptr<'tcx>(
+    fx: &mut FunctionCx<'_, '_, 'tcx>,
+    src: Value,
+    src_layout: TyAndLayout<'tcx>,
+    dst_layout: TyAndLayout<'tcx>,
+    old_info: Option<Value>,
+) -> (Value, Value) {
+    match (&src_layout.ty.kind(), &dst_layout.ty.kind()) {
+        (&ty::Ref(_, a, _), &ty::Ref(_, b, _))
+        | (&ty::Ref(_, a, _), &ty::RawPtr(b, _))
+        | (&ty::RawPtr(a, _), &ty::RawPtr(b, _)) => (src, unsized_info(fx, *a, *b, old_info)),
+        (&ty::Adt(def_a, _), &ty::Adt(def_b, _)) => {
+            assert_eq!(def_a, def_b);
+
+            if src_layout == dst_layout {
+                return (src, old_info.unwrap());
+            }
+
+            let mut result = None;
+            for i in 0..src_layout.fields.count() {
+                let src_f = src_layout.field(fx, i);
+                assert_eq!(src_layout.fields.offset(i).bytes(), 0);
+                assert_eq!(dst_layout.fields.offset(i).bytes(), 0);
+                if src_f.is_1zst() {
+                    // We are looking for the one non-1-ZST field; this is not it.
+                    continue;
+                }
+                assert_eq!(src_layout.size, src_f.size);
+
+                let dst_f = dst_layout.field(fx, i);
+                assert_ne!(src_f.ty, dst_f.ty);
+                assert_eq!(result, None);
+                result = Some(unsize_ptr(fx, src, src_f, dst_f, old_info));
+            }
+            result.unwrap()
+        }
+        _ => bug!("unsize_ptr: called on bad types"),
+    }
+}
+
+/// Coerces `src` to `dst_ty` which is guaranteed to be a `dyn*` type.
+pub(crate) fn cast_to_dyn_star<'tcx>(
+    fx: &mut FunctionCx<'_, '_, 'tcx>,
+    src: Value,
+    src_ty_and_layout: TyAndLayout<'tcx>,
+    dst_ty: Ty<'tcx>,
+    old_info: Option<Value>,
+) -> (Value, Value) {
+    assert!(
+        matches!(dst_ty.kind(), ty::Dynamic(_, _, ty::DynStar)),
+        "destination type must be a dyn*"
+    );
+    (src, unsized_info(fx, src_ty_and_layout.ty, dst_ty, old_info))
+}
+
+/// Coerce `src`, which is a reference to a value of type `src_ty`,
+/// to a value of type `dst_ty` and store the result in `dst`
+pub(crate) fn coerce_unsized_into<'tcx>(
+    fx: &mut FunctionCx<'_, '_, 'tcx>,
+    src: CValue<'tcx>,
+    dst: CPlace<'tcx>,
+) {
+    let src_ty = src.layout().ty;
+    let dst_ty = dst.layout().ty;
+    let mut coerce_ptr = || {
+        let (base, info) =
+            if fx.layout_of(src.layout().ty.builtin_deref(true).unwrap()).is_unsized() {
+                let (old_base, old_info) = src.load_scalar_pair(fx);
+                unsize_ptr(fx, old_base, src.layout(), dst.layout(), Some(old_info))
+            } else {
+                let base = src.load_scalar(fx);
+                unsize_ptr(fx, base, src.layout(), dst.layout(), None)
+            };
+        dst.write_cvalue(fx, CValue::by_val_pair(base, info, dst.layout()));
+    };
+    match (&src_ty.kind(), &dst_ty.kind()) {
+        (&ty::Ref(..), &ty::Ref(..))
+        | (&ty::Ref(..), &ty::RawPtr(..))
+        | (&ty::RawPtr(..), &ty::RawPtr(..)) => coerce_ptr(),
+        (&ty::Adt(def_a, _), &ty::Adt(def_b, _)) => {
+            assert_eq!(def_a, def_b);
+
+            for i in 0..def_a.variant(FIRST_VARIANT).fields.len() {
+                let src_f = src.value_field(fx, FieldIdx::new(i));
+                let dst_f = dst.place_field(fx, FieldIdx::new(i));
+
+                if dst_f.layout().is_zst() {
+                    // No data here, nothing to copy/coerce.
+                    continue;
+                }
+
+                if src_f.layout().ty == dst_f.layout().ty {
+                    dst_f.write_cvalue(fx, src_f);
+                } else {
+                    coerce_unsized_into(fx, src_f, dst_f);
+                }
+            }
+        }
+        _ => bug!("coerce_unsized_into: invalid coercion {:?} -> {:?}", src_ty, dst_ty),
+    }
+}
+
+pub(crate) fn coerce_dyn_star<'tcx>(
+    fx: &mut FunctionCx<'_, '_, 'tcx>,
+    src: CValue<'tcx>,
+    dst: CPlace<'tcx>,
+) {
+    let (data, extra) = if let ty::Dynamic(_, _, ty::DynStar) = src.layout().ty.kind() {
+        let (data, vtable) = src.load_scalar_pair(fx);
+        (data, Some(vtable))
+    } else {
+        let data = src.load_scalar(fx);
+        (data, None)
+    };
+
+    let (data, vtable) = cast_to_dyn_star(fx, data, src.layout(), dst.layout().ty, extra);
+
+    dst.write_cvalue(fx, CValue::by_val_pair(data, vtable, dst.layout()));
+}
+
+// Adapted from https://github.com/rust-lang/rust/blob/2a663555ddf36f6b041445894a8c175cd1bc718c/src/librustc_codegen_ssa/glue.rs
+
+pub(crate) fn size_and_align_of<'tcx>(
+    fx: &mut FunctionCx<'_, '_, 'tcx>,
+    layout: TyAndLayout<'tcx>,
+    info: Option<Value>,
+) -> (Value, Value) {
+    if layout.is_sized() {
+        return (
+            fx.bcx.ins().iconst(fx.pointer_type, layout.size.bytes() as i64),
+            fx.bcx.ins().iconst(fx.pointer_type, layout.align.abi.bytes() as i64),
+        );
+    }
+
+    let ty = layout.ty;
+    match ty.kind() {
+        ty::Dynamic(..) => {
+            // load size/align from vtable
+            (
+                crate::vtable::size_of_obj(fx, info.unwrap()),
+                crate::vtable::min_align_of_obj(fx, info.unwrap()),
+            )
+        }
+        ty::Slice(_) | ty::Str => {
+            let unit = layout.field(fx, 0);
+            // The info in this case is the length of the str, so the size is that
+            // times the unit size.
+            (
+                fx.bcx.ins().imul_imm(info.unwrap(), unit.size.bytes() as i64),
+                fx.bcx.ins().iconst(fx.pointer_type, unit.align.abi.bytes() as i64),
+            )
+        }
+        ty::Foreign(_) => {
+            let trap_block = fx.bcx.create_block();
+            let true_ = fx.bcx.ins().iconst(types::I8, 1);
+            let next_block = fx.bcx.create_block();
+            fx.bcx.ins().brif(true_, trap_block, &[], next_block, &[]);
+            fx.bcx.seal_block(trap_block);
+            fx.bcx.seal_block(next_block);
+            fx.bcx.switch_to_block(trap_block);
+
+            // `extern` type. We cannot compute the size, so panic.
+            let msg_str = with_no_visible_paths!({
+                with_no_trimmed_paths!({
+                    format!("attempted to compute the size or alignment of extern type `{ty}`")
+                })
+            });
+
+            codegen_panic_nounwind(fx, &msg_str, None);
+
+            fx.bcx.switch_to_block(next_block);
+
+            // This function does not return so we can now return whatever we want.
+            let size = fx.bcx.ins().iconst(fx.pointer_type, 42);
+            let align = fx.bcx.ins().iconst(fx.pointer_type, 42);
+            (size, align)
+        }
+        ty::Adt(..) | ty::Tuple(..) => {
+            // First get the size of all statically known fields.
+            // Don't use size_of because it also rounds up to alignment, which we
+            // want to avoid, as the unsized field's alignment could be smaller.
+            assert!(!layout.ty.is_simd());
+
+            let i = layout.fields.count() - 1;
+            let unsized_offset_unadjusted = layout.fields.offset(i).bytes();
+            let unsized_offset_unadjusted =
+                fx.bcx.ins().iconst(fx.pointer_type, unsized_offset_unadjusted as i64);
+            let sized_align = layout.align.abi.bytes();
+            let sized_align = fx.bcx.ins().iconst(fx.pointer_type, sized_align as i64);
+
+            // Recurse to get the size of the dynamically sized field (must be
+            // the last field).
+            let field_layout = layout.field(fx, i);
+            let (unsized_size, mut unsized_align) = size_and_align_of(fx, field_layout, info);
+
+            // # First compute the dynamic alignment
+
+            // For packed types, we need to cap the alignment.
+            if let ty::Adt(def, _) = ty.kind() {
+                if let Some(packed) = def.repr().pack {
+                    if packed.bytes() == 1 {
+                        // We know this will be capped to 1.
+                        unsized_align = fx.bcx.ins().iconst(fx.pointer_type, 1);
+                    } else {
+                        // We have to dynamically compute `min(unsized_align, packed)`.
+                        let packed = fx.bcx.ins().iconst(fx.pointer_type, packed.bytes() as i64);
+                        let cmp = fx.bcx.ins().icmp(IntCC::UnsignedLessThan, unsized_align, packed);
+                        unsized_align = fx.bcx.ins().select(cmp, unsized_align, packed);
+                    }
+                }
+            }
+
+            // Choose max of two known alignments (combined value must
+            // be aligned according to more restrictive of the two).
+            let cmp = fx.bcx.ins().icmp(IntCC::UnsignedGreaterThan, sized_align, unsized_align);
+            let full_align = fx.bcx.ins().select(cmp, sized_align, unsized_align);
+
+            // # Then compute the dynamic size
+
+            // The full formula for the size would be:
+            // let unsized_offset_adjusted = unsized_offset_unadjusted.align_to(unsized_align);
+            // let full_size = (unsized_offset_adjusted + unsized_size).align_to(full_align);
+            // However, `unsized_size` is a multiple of `unsized_align`.
+            // Therefore, we can equivalently do the `align_to(unsized_align)` *after* adding `unsized_size`:
+            // let full_size = (unsized_offset_unadjusted + unsized_size).align_to(unsized_align).align_to(full_align);
+            // Furthermore, `align >= unsized_align`, and therefore we only need to do:
+            // let full_size = (unsized_offset_unadjusted + unsized_size).align_to(full_align);
+
+            let full_size = fx.bcx.ins().iadd(unsized_offset_unadjusted, unsized_size);
+
+            // Issue #27023: must add any necessary padding to `size`
+            // (to make it a multiple of `align`) before returning it.
+            //
+            // Namely, the returned size should be, in C notation:
+            //
+            //   `size + ((size & (align-1)) ? align : 0)`
+            //
+            // emulated via the semi-standard fast bit trick:
+            //
+            //   `(size + (align-1)) & -align`
+            let addend = fx.bcx.ins().iadd_imm(full_align, -1);
+            let add = fx.bcx.ins().iadd(full_size, addend);
+            let neg = fx.bcx.ins().ineg(full_align);
+            let full_size = fx.bcx.ins().band(add, neg);
+
+            (full_size, full_align)
+        }
+        _ => bug!("size_and_align_of_dst: {ty} not supported"),
+    }
+}
diff --git a/compiler/rustc_codegen_cranelift/src/value_and_place.rs b/compiler/rustc_codegen_cranelift/src/value_and_place.rs
new file mode 100644
index 00000000000..b6d6d211e65
--- /dev/null
+++ b/compiler/rustc_codegen_cranelift/src/value_and_place.rs
@@ -0,0 +1,1005 @@
+//! Definition of [`CValue`] and [`CPlace`]
+
+use cranelift_codegen::entity::EntityRef;
+use cranelift_codegen::ir::immediates::Offset32;
+use cranelift_frontend::Variable;
+use rustc_middle::ty::FnSig;
+
+use crate::prelude::*;
+
+fn codegen_field<'tcx>(
+    fx: &mut FunctionCx<'_, '_, 'tcx>,
+    base: Pointer,
+    extra: Option<Value>,
+    layout: TyAndLayout<'tcx>,
+    field: FieldIdx,
+) -> (Pointer, TyAndLayout<'tcx>) {
+    let field_offset = layout.fields.offset(field.index());
+    let field_layout = layout.field(&*fx, field.index());
+
+    let simple = |fx: &mut FunctionCx<'_, '_, '_>| {
+        (base.offset_i64(fx, i64::try_from(field_offset.bytes()).unwrap()), field_layout)
+    };
+
+    if field_layout.is_sized() {
+        return simple(fx);
+    }
+    match field_layout.ty.kind() {
+        ty::Slice(..) | ty::Str => simple(fx),
+        _ => {
+            let unaligned_offset = field_offset.bytes();
+
+            // Get the alignment of the field
+            let (_, mut unsized_align) = crate::unsize::size_and_align_of(fx, field_layout, extra);
+
+            // For packed types, we need to cap alignment.
+            if let ty::Adt(def, _) = layout.ty.kind() {
+                if let Some(packed) = def.repr().pack {
+                    let packed = fx.bcx.ins().iconst(fx.pointer_type, packed.bytes() as i64);
+                    let cmp = fx.bcx.ins().icmp(IntCC::UnsignedLessThan, unsized_align, packed);
+                    unsized_align = fx.bcx.ins().select(cmp, unsized_align, packed);
+                }
+            }
+
+            // Bump the unaligned offset up to the appropriate alignment
+            let one = fx.bcx.ins().iconst(fx.pointer_type, 1);
+            let align_sub_1 = fx.bcx.ins().isub(unsized_align, one);
+            let and_lhs = fx.bcx.ins().iadd_imm(align_sub_1, unaligned_offset as i64);
+            let zero = fx.bcx.ins().iconst(fx.pointer_type, 0);
+            let and_rhs = fx.bcx.ins().isub(zero, unsized_align);
+            let offset = fx.bcx.ins().band(and_lhs, and_rhs);
+
+            (base.offset_value(fx, offset), field_layout)
+        }
+    }
+}
+
+fn scalar_pair_calculate_b_offset(tcx: TyCtxt<'_>, a_scalar: Scalar, b_scalar: Scalar) -> Offset32 {
+    let b_offset = a_scalar.size(&tcx).align_to(b_scalar.align(&tcx).abi);
+    Offset32::new(b_offset.bytes().try_into().unwrap())
+}
+
+/// A read-only value
+#[derive(Debug, Copy, Clone)]
+pub(crate) struct CValue<'tcx>(CValueInner, TyAndLayout<'tcx>);
+
+#[derive(Debug, Copy, Clone)]
+enum CValueInner {
+    ByRef(Pointer, Option<Value>),
+    ByVal(Value),
+    ByValPair(Value, Value),
+}
+
+impl<'tcx> CValue<'tcx> {
+    pub(crate) fn by_ref(ptr: Pointer, layout: TyAndLayout<'tcx>) -> CValue<'tcx> {
+        CValue(CValueInner::ByRef(ptr, None), layout)
+    }
+
+    pub(crate) fn by_ref_unsized(
+        ptr: Pointer,
+        meta: Value,
+        layout: TyAndLayout<'tcx>,
+    ) -> CValue<'tcx> {
+        CValue(CValueInner::ByRef(ptr, Some(meta)), layout)
+    }
+
+    pub(crate) fn by_val(value: Value, layout: TyAndLayout<'tcx>) -> CValue<'tcx> {
+        CValue(CValueInner::ByVal(value), layout)
+    }
+
+    pub(crate) fn by_val_pair(
+        value: Value,
+        extra: Value,
+        layout: TyAndLayout<'tcx>,
+    ) -> CValue<'tcx> {
+        CValue(CValueInner::ByValPair(value, extra), layout)
+    }
+
+    pub(crate) fn layout(&self) -> TyAndLayout<'tcx> {
+        self.1
+    }
+
+    // FIXME remove
+    pub(crate) fn force_stack(self, fx: &mut FunctionCx<'_, '_, 'tcx>) -> (Pointer, Option<Value>) {
+        let layout = self.1;
+        match self.0 {
+            CValueInner::ByRef(ptr, meta) => (ptr, meta),
+            CValueInner::ByVal(_) | CValueInner::ByValPair(_, _) => {
+                let cplace = CPlace::new_stack_slot(fx, layout);
+                cplace.write_cvalue(fx, self);
+                (cplace.to_ptr(), None)
+            }
+        }
+    }
+
+    // FIXME remove
+    /// Forces the data value of a dyn* value to the stack and returns a pointer to it as well as the
+    /// vtable pointer.
+    pub(crate) fn dyn_star_force_data_on_stack(
+        self,
+        fx: &mut FunctionCx<'_, '_, 'tcx>,
+    ) -> (Value, Value) {
+        assert!(self.1.ty.is_dyn_star());
+
+        match self.0 {
+            CValueInner::ByRef(ptr, None) => {
+                let (a_scalar, b_scalar) = match self.1.abi {
+                    Abi::ScalarPair(a, b) => (a, b),
+                    _ => unreachable!("dyn_star_force_data_on_stack({:?})", self),
+                };
+                let b_offset = scalar_pair_calculate_b_offset(fx.tcx, a_scalar, b_scalar);
+                let clif_ty2 = scalar_to_clif_type(fx.tcx, b_scalar);
+                let mut flags = MemFlags::new();
+                flags.set_notrap();
+                let vtable = ptr.offset(fx, b_offset).load(fx, clif_ty2, flags);
+                (ptr.get_addr(fx), vtable)
+            }
+            CValueInner::ByValPair(data, vtable) => {
+                let data_ptr = fx.create_stack_slot(
+                    u32::try_from(fx.target_config.pointer_type().bytes()).unwrap(),
+                    u32::try_from(fx.target_config.pointer_type().bytes()).unwrap(),
+                );
+                data_ptr.store(fx, data, MemFlags::trusted());
+
+                (data_ptr.get_addr(fx), vtable)
+            }
+            CValueInner::ByRef(_, Some(_)) | CValueInner::ByVal(_) => {
+                unreachable!("dyn_star_force_data_on_stack({:?})", self)
+            }
+        }
+    }
+
+    pub(crate) fn try_to_ptr(self) -> Option<(Pointer, Option<Value>)> {
+        match self.0 {
+            CValueInner::ByRef(ptr, meta) => Some((ptr, meta)),
+            CValueInner::ByVal(_) | CValueInner::ByValPair(_, _) => None,
+        }
+    }
+
+    /// Load a value with layout.abi of scalar
+    #[track_caller]
+    pub(crate) fn load_scalar(self, fx: &mut FunctionCx<'_, '_, 'tcx>) -> Value {
+        let layout = self.1;
+        match self.0 {
+            CValueInner::ByRef(ptr, None) => {
+                let clif_ty = match layout.abi {
+                    Abi::Scalar(scalar) => scalar_to_clif_type(fx.tcx, scalar),
+                    Abi::Vector { element, count } => scalar_to_clif_type(fx.tcx, element)
+                        .by(u32::try_from(count).unwrap())
+                        .unwrap(),
+                    _ => unreachable!("{:?}", layout.ty),
+                };
+                let mut flags = MemFlags::new();
+                flags.set_notrap();
+                ptr.load(fx, clif_ty, flags)
+            }
+            CValueInner::ByVal(value) => value,
+            CValueInner::ByRef(_, Some(_)) => bug!("load_scalar for unsized value not allowed"),
+            CValueInner::ByValPair(_, _) => bug!("Please use load_scalar_pair for ByValPair"),
+        }
+    }
+
+    /// Load a value pair with layout.abi of scalar pair
+    #[track_caller]
+    pub(crate) fn load_scalar_pair(self, fx: &mut FunctionCx<'_, '_, 'tcx>) -> (Value, Value) {
+        let layout = self.1;
+        match self.0 {
+            CValueInner::ByRef(ptr, None) => {
+                let (a_scalar, b_scalar) = match layout.abi {
+                    Abi::ScalarPair(a, b) => (a, b),
+                    _ => unreachable!("load_scalar_pair({:?})", self),
+                };
+                let b_offset = scalar_pair_calculate_b_offset(fx.tcx, a_scalar, b_scalar);
+                let clif_ty1 = scalar_to_clif_type(fx.tcx, a_scalar);
+                let clif_ty2 = scalar_to_clif_type(fx.tcx, b_scalar);
+                let mut flags = MemFlags::new();
+                flags.set_notrap();
+                let val1 = ptr.load(fx, clif_ty1, flags);
+                let val2 = ptr.offset(fx, b_offset).load(fx, clif_ty2, flags);
+                (val1, val2)
+            }
+            CValueInner::ByRef(_, Some(_)) => {
+                bug!("load_scalar_pair for unsized value not allowed")
+            }
+            CValueInner::ByVal(_) => bug!("Please use load_scalar for ByVal"),
+            CValueInner::ByValPair(val1, val2) => (val1, val2),
+        }
+    }
+
+    pub(crate) fn value_field(
+        self,
+        fx: &mut FunctionCx<'_, '_, 'tcx>,
+        field: FieldIdx,
+    ) -> CValue<'tcx> {
+        let layout = self.1;
+        match self.0 {
+            CValueInner::ByVal(_) => unreachable!(),
+            CValueInner::ByValPair(val1, val2) => match layout.abi {
+                Abi::ScalarPair(_, _) => {
+                    let val = match field.as_u32() {
+                        0 => val1,
+                        1 => val2,
+                        _ => bug!("field should be 0 or 1"),
+                    };
+                    let field_layout = layout.field(&*fx, usize::from(field));
+                    CValue::by_val(val, field_layout)
+                }
+                _ => unreachable!("value_field for ByValPair with abi {:?}", layout.abi),
+            },
+            CValueInner::ByRef(ptr, None) => {
+                let (field_ptr, field_layout) = codegen_field(fx, ptr, None, layout, field);
+                CValue::by_ref(field_ptr, field_layout)
+            }
+            CValueInner::ByRef(_, Some(_)) => todo!(),
+        }
+    }
+
+    /// Like [`CValue::value_field`] except handling ADTs containing a single array field in a way
+    /// such that you can access individual lanes.
+    pub(crate) fn value_lane(
+        self,
+        fx: &mut FunctionCx<'_, '_, 'tcx>,
+        lane_idx: u64,
+    ) -> CValue<'tcx> {
+        let layout = self.1;
+        assert!(layout.ty.is_simd());
+        let (lane_count, lane_ty) = layout.ty.simd_size_and_type(fx.tcx);
+        let lane_layout = fx.layout_of(lane_ty);
+        assert!(lane_idx < lane_count);
+
+        match self.0 {
+            CValueInner::ByVal(_) | CValueInner::ByValPair(_, _) => unreachable!(),
+            CValueInner::ByRef(ptr, None) => {
+                let field_offset = lane_layout.size * lane_idx;
+                let field_ptr = ptr.offset_i64(fx, i64::try_from(field_offset.bytes()).unwrap());
+                CValue::by_ref(field_ptr, lane_layout)
+            }
+            CValueInner::ByRef(_, Some(_)) => unreachable!(),
+        }
+    }
+
+    /// Like [`CValue::value_field`] except using the passed type as lane type instead of the one
+    /// specified by the vector type.
+    pub(crate) fn value_typed_lane(
+        self,
+        fx: &mut FunctionCx<'_, '_, 'tcx>,
+        lane_ty: Ty<'tcx>,
+        lane_idx: u64,
+    ) -> CValue<'tcx> {
+        let layout = self.1;
+        assert!(layout.ty.is_simd());
+        let (orig_lane_count, orig_lane_ty) = layout.ty.simd_size_and_type(fx.tcx);
+        let lane_layout = fx.layout_of(lane_ty);
+        assert!(
+            (lane_idx + 1) * lane_layout.size <= orig_lane_count * fx.layout_of(orig_lane_ty).size
+        );
+
+        match self.0 {
+            CValueInner::ByVal(_) | CValueInner::ByValPair(_, _) => unreachable!(),
+            CValueInner::ByRef(ptr, None) => {
+                let field_offset = lane_layout.size * lane_idx;
+                let field_ptr = ptr.offset_i64(fx, i64::try_from(field_offset.bytes()).unwrap());
+                CValue::by_ref(field_ptr, lane_layout)
+            }
+            CValueInner::ByRef(_, Some(_)) => unreachable!(),
+        }
+    }
+
+    /// Like [`CValue::value_lane`] except allowing a dynamically calculated lane index.
+    pub(crate) fn value_lane_dyn(
+        self,
+        fx: &mut FunctionCx<'_, '_, 'tcx>,
+        lane_idx: Value,
+    ) -> CValue<'tcx> {
+        let layout = self.1;
+        assert!(layout.ty.is_simd());
+        let (_lane_count, lane_ty) = layout.ty.simd_size_and_type(fx.tcx);
+        let lane_layout = fx.layout_of(lane_ty);
+        match self.0 {
+            CValueInner::ByVal(_) | CValueInner::ByValPair(_, _) => unreachable!(),
+            CValueInner::ByRef(ptr, None) => {
+                let field_offset = fx.bcx.ins().imul_imm(lane_idx, lane_layout.size.bytes() as i64);
+                let field_ptr = ptr.offset_value(fx, field_offset);
+                CValue::by_ref(field_ptr, lane_layout)
+            }
+            CValueInner::ByRef(_, Some(_)) => unreachable!(),
+        }
+    }
+
+    /// If `ty` is signed, `const_val` must already be sign extended.
+    pub(crate) fn const_val(
+        fx: &mut FunctionCx<'_, '_, 'tcx>,
+        layout: TyAndLayout<'tcx>,
+        const_val: ty::ScalarInt,
+    ) -> CValue<'tcx> {
+        assert_eq!(const_val.size(), layout.size, "{:#?}: {:?}", const_val, layout);
+        use cranelift_codegen::ir::immediates::{Ieee32, Ieee64};
+
+        let clif_ty = fx.clif_type(layout.ty).unwrap();
+
+        let val = match layout.ty.kind() {
+            ty::Uint(UintTy::U128) | ty::Int(IntTy::I128) => {
+                let const_val = const_val.assert_bits(layout.size);
+                let lsb = fx.bcx.ins().iconst(types::I64, const_val as u64 as i64);
+                let msb = fx.bcx.ins().iconst(types::I64, (const_val >> 64) as u64 as i64);
+                fx.bcx.ins().iconcat(lsb, msb)
+            }
+            ty::Bool
+            | ty::Char
+            | ty::Uint(_)
+            | ty::Int(_)
+            | ty::Ref(..)
+            | ty::RawPtr(..)
+            | ty::FnPtr(..) => {
+                let raw_val = const_val.size().truncate(const_val.assert_bits(layout.size));
+                fx.bcx.ins().iconst(clif_ty, raw_val as i64)
+            }
+            ty::Float(FloatTy::F32) => {
+                fx.bcx.ins().f32const(Ieee32::with_bits(u32::try_from(const_val).unwrap()))
+            }
+            ty::Float(FloatTy::F64) => {
+                fx.bcx.ins().f64const(Ieee64::with_bits(u64::try_from(const_val).unwrap()))
+            }
+            _ => panic!(
+                "CValue::const_val for non bool/char/float/integer/pointer type {:?} is not allowed",
+                layout.ty
+            ),
+        };
+
+        CValue::by_val(val, layout)
+    }
+
+    pub(crate) fn cast_pointer_to(self, layout: TyAndLayout<'tcx>) -> Self {
+        assert!(matches!(self.layout().ty.kind(), ty::Ref(..) | ty::RawPtr(..) | ty::FnPtr(..)));
+        assert!(matches!(layout.ty.kind(), ty::Ref(..) | ty::RawPtr(..) | ty::FnPtr(..)));
+        assert_eq!(self.layout().abi, layout.abi);
+        CValue(self.0, layout)
+    }
+}
+
+/// A place where you can write a value to or read a value from
+#[derive(Debug, Copy, Clone)]
+pub(crate) struct CPlace<'tcx> {
+    inner: CPlaceInner,
+    layout: TyAndLayout<'tcx>,
+}
+
+#[derive(Debug, Copy, Clone)]
+enum CPlaceInner {
+    Var(Local, Variable),
+    VarPair(Local, Variable, Variable),
+    Addr(Pointer, Option<Value>),
+}
+
+impl<'tcx> CPlace<'tcx> {
+    pub(crate) fn layout(&self) -> TyAndLayout<'tcx> {
+        self.layout
+    }
+
+    pub(crate) fn new_stack_slot(
+        fx: &mut FunctionCx<'_, '_, 'tcx>,
+        layout: TyAndLayout<'tcx>,
+    ) -> CPlace<'tcx> {
+        assert!(layout.is_sized());
+        if layout.size.bytes() == 0 {
+            return CPlace {
+                inner: CPlaceInner::Addr(Pointer::dangling(layout.align.pref), None),
+                layout,
+            };
+        }
+
+        if layout.size.bytes() >= u64::from(u32::MAX - 16) {
+            fx.tcx
+                .dcx()
+                .fatal(format!("values of type {} are too big to store on the stack", layout.ty));
+        }
+
+        let stack_slot = fx.create_stack_slot(
+            u32::try_from(layout.size.bytes()).unwrap(),
+            u32::try_from(layout.align.pref.bytes()).unwrap(),
+        );
+        CPlace { inner: CPlaceInner::Addr(stack_slot, None), layout }
+    }
+
+    pub(crate) fn new_var(
+        fx: &mut FunctionCx<'_, '_, 'tcx>,
+        local: Local,
+        layout: TyAndLayout<'tcx>,
+    ) -> CPlace<'tcx> {
+        let var = Variable::from_u32(fx.next_ssa_var);
+        fx.next_ssa_var += 1;
+        fx.bcx.declare_var(var, fx.clif_type(layout.ty).unwrap());
+        CPlace { inner: CPlaceInner::Var(local, var), layout }
+    }
+
+    pub(crate) fn new_var_pair(
+        fx: &mut FunctionCx<'_, '_, 'tcx>,
+        local: Local,
+        layout: TyAndLayout<'tcx>,
+    ) -> CPlace<'tcx> {
+        let var1 = Variable::from_u32(fx.next_ssa_var);
+        fx.next_ssa_var += 1;
+        let var2 = Variable::from_u32(fx.next_ssa_var);
+        fx.next_ssa_var += 1;
+
+        let (ty1, ty2) = fx.clif_pair_type(layout.ty).unwrap();
+        fx.bcx.declare_var(var1, ty1);
+        fx.bcx.declare_var(var2, ty2);
+        CPlace { inner: CPlaceInner::VarPair(local, var1, var2), layout }
+    }
+
+    pub(crate) fn for_ptr(ptr: Pointer, layout: TyAndLayout<'tcx>) -> CPlace<'tcx> {
+        CPlace { inner: CPlaceInner::Addr(ptr, None), layout }
+    }
+
+    pub(crate) fn for_ptr_with_extra(
+        ptr: Pointer,
+        extra: Value,
+        layout: TyAndLayout<'tcx>,
+    ) -> CPlace<'tcx> {
+        CPlace { inner: CPlaceInner::Addr(ptr, Some(extra)), layout }
+    }
+
+    pub(crate) fn to_cvalue(self, fx: &mut FunctionCx<'_, '_, 'tcx>) -> CValue<'tcx> {
+        let layout = self.layout();
+        match self.inner {
+            CPlaceInner::Var(_local, var) => {
+                let val = fx.bcx.use_var(var);
+                //fx.bcx.set_val_label(val, cranelift_codegen::ir::ValueLabel::new(var.index()));
+                CValue::by_val(val, layout)
+            }
+            CPlaceInner::VarPair(_local, var1, var2) => {
+                let val1 = fx.bcx.use_var(var1);
+                //fx.bcx.set_val_label(val1, cranelift_codegen::ir::ValueLabel::new(var1.index()));
+                let val2 = fx.bcx.use_var(var2);
+                //fx.bcx.set_val_label(val2, cranelift_codegen::ir::ValueLabel::new(var2.index()));
+                CValue::by_val_pair(val1, val2, layout)
+            }
+            CPlaceInner::Addr(ptr, extra) => {
+                if let Some(extra) = extra {
+                    CValue::by_ref_unsized(ptr, extra, layout)
+                } else {
+                    CValue::by_ref(ptr, layout)
+                }
+            }
+        }
+    }
+
+    pub(crate) fn debug_comment(self) -> (&'static str, String) {
+        match self.inner {
+            CPlaceInner::Var(_local, var) => ("ssa", format!("var={}", var.index())),
+            CPlaceInner::VarPair(_local, var1, var2) => {
+                ("ssa", format!("var=({}, {})", var1.index(), var2.index()))
+            }
+            CPlaceInner::Addr(ptr, meta) => {
+                let meta =
+                    if let Some(meta) = meta { format!(",meta={}", meta) } else { String::new() };
+                match ptr.debug_base_and_offset() {
+                    (crate::pointer::PointerBase::Addr(addr), offset) => {
+                        ("reuse", format!("storage={}{}{}", addr, offset, meta))
+                    }
+                    (crate::pointer::PointerBase::Stack(stack_slot), offset) => {
+                        ("stack", format!("storage={}{}{}", stack_slot, offset, meta))
+                    }
+                    (crate::pointer::PointerBase::Dangling(align), offset) => {
+                        ("zst", format!("align={},offset={}", align.bytes(), offset))
+                    }
+                }
+            }
+        }
+    }
+
+    #[track_caller]
+    pub(crate) fn to_ptr(self) -> Pointer {
+        match self.inner {
+            CPlaceInner::Addr(ptr, None) => ptr,
+            CPlaceInner::Addr(_, Some(_)) => bug!("Expected sized cplace, found {:?}", self),
+            CPlaceInner::Var(_, _) | CPlaceInner::VarPair(_, _, _) => {
+                bug!("Expected CPlace::Addr, found {:?}", self)
+            }
+        }
+    }
+
+    #[track_caller]
+    pub(crate) fn to_ptr_unsized(self) -> (Pointer, Value) {
+        match self.inner {
+            CPlaceInner::Addr(ptr, Some(extra)) => (ptr, extra),
+            CPlaceInner::Addr(_, None) | CPlaceInner::Var(_, _) | CPlaceInner::VarPair(_, _, _) => {
+                bug!("Expected unsized cplace, found {:?}", self)
+            }
+        }
+    }
+
+    pub(crate) fn try_to_ptr(self) -> Option<Pointer> {
+        match self.inner {
+            CPlaceInner::Var(_, _) | CPlaceInner::VarPair(_, _, _) => None,
+            CPlaceInner::Addr(ptr, None) => Some(ptr),
+            CPlaceInner::Addr(_, Some(_)) => bug!("Expected sized cplace, found {:?}", self),
+        }
+    }
+
+    pub(crate) fn write_cvalue(self, fx: &mut FunctionCx<'_, '_, 'tcx>, from: CValue<'tcx>) {
+        assert_assignable(fx, from.layout().ty, self.layout().ty, 16);
+
+        self.write_cvalue_maybe_transmute(fx, from, "write_cvalue");
+    }
+
+    pub(crate) fn write_cvalue_transmute(
+        self,
+        fx: &mut FunctionCx<'_, '_, 'tcx>,
+        from: CValue<'tcx>,
+    ) {
+        self.write_cvalue_maybe_transmute(fx, from, "write_cvalue_transmute");
+    }
+
+    fn write_cvalue_maybe_transmute(
+        self,
+        fx: &mut FunctionCx<'_, '_, 'tcx>,
+        from: CValue<'tcx>,
+        method: &'static str,
+    ) {
+        fn transmute_scalar<'tcx>(
+            fx: &mut FunctionCx<'_, '_, 'tcx>,
+            var: Variable,
+            data: Value,
+            dst_ty: Type,
+        ) {
+            let src_ty = fx.bcx.func.dfg.value_type(data);
+            assert_eq!(
+                src_ty.bytes(),
+                dst_ty.bytes(),
+                "write_cvalue_transmute: {:?} -> {:?}",
+                src_ty,
+                dst_ty,
+            );
+            let data = match (src_ty, dst_ty) {
+                (_, _) if src_ty == dst_ty => data,
+
+                // This is a `write_cvalue_transmute`.
+                (types::I32, types::F32)
+                | (types::F32, types::I32)
+                | (types::I64, types::F64)
+                | (types::F64, types::I64) => codegen_bitcast(fx, dst_ty, data),
+                _ if src_ty.is_vector() && dst_ty.is_vector() => codegen_bitcast(fx, dst_ty, data),
+                _ if src_ty.is_vector() || dst_ty.is_vector() => {
+                    // FIXME(bytecodealliance/wasmtime#6104) do something more efficient for transmutes between vectors and integers.
+                    let ptr = fx.create_stack_slot(src_ty.bytes(), src_ty.bytes());
+                    ptr.store(fx, data, MemFlags::trusted());
+                    ptr.load(fx, dst_ty, MemFlags::trusted())
+                }
+
+                // `CValue`s should never contain SSA-only types, so if you ended
+                // up here having seen an error like `B1 -> I8`, then before
+                // calling `write_cvalue` you need to add a `bint` instruction.
+                _ => unreachable!("write_cvalue_transmute: {:?} -> {:?}", src_ty, dst_ty),
+            };
+            //fx.bcx.set_val_label(data, cranelift_codegen::ir::ValueLabel::new(var.index()));
+            fx.bcx.def_var(var, data);
+        }
+
+        assert_eq!(self.layout().size, from.layout().size);
+
+        if fx.clif_comments.enabled() {
+            use cranelift_codegen::cursor::{Cursor, CursorPosition};
+            let cur_block = match fx.bcx.cursor().position() {
+                CursorPosition::After(block) => block,
+                _ => unreachable!(),
+            };
+            fx.add_comment(
+                fx.bcx.func.layout.last_inst(cur_block).unwrap(),
+                format!(
+                    "{}: {:?}: {:?} <- {:?}: {:?}",
+                    method,
+                    self.inner,
+                    self.layout().ty,
+                    from.0,
+                    from.layout().ty
+                ),
+            );
+        }
+
+        let dst_layout = self.layout();
+        match self.inner {
+            CPlaceInner::Var(_local, var) => {
+                let data = match from.1.abi {
+                    Abi::Scalar(_) => CValue(from.0, dst_layout).load_scalar(fx),
+                    _ => {
+                        let (ptr, meta) = from.force_stack(fx);
+                        assert!(meta.is_none());
+                        CValue(CValueInner::ByRef(ptr, None), dst_layout).load_scalar(fx)
+                    }
+                };
+                let dst_ty = fx.clif_type(self.layout().ty).unwrap();
+                transmute_scalar(fx, var, data, dst_ty);
+            }
+            CPlaceInner::VarPair(_local, var1, var2) => {
+                let (data1, data2) = match from.1.abi {
+                    Abi::ScalarPair(_, _) => CValue(from.0, dst_layout).load_scalar_pair(fx),
+                    _ => {
+                        let (ptr, meta) = from.force_stack(fx);
+                        assert!(meta.is_none());
+                        CValue(CValueInner::ByRef(ptr, None), dst_layout).load_scalar_pair(fx)
+                    }
+                };
+                let (dst_ty1, dst_ty2) = fx.clif_pair_type(self.layout().ty).unwrap();
+                transmute_scalar(fx, var1, data1, dst_ty1);
+                transmute_scalar(fx, var2, data2, dst_ty2);
+            }
+            CPlaceInner::Addr(_, Some(_)) => bug!("Can't write value to unsized place {:?}", self),
+            CPlaceInner::Addr(to_ptr, None) => {
+                if dst_layout.size == Size::ZERO || dst_layout.abi == Abi::Uninhabited {
+                    return;
+                }
+
+                let mut flags = MemFlags::new();
+                flags.set_notrap();
+
+                match from.0 {
+                    CValueInner::ByVal(val) => {
+                        to_ptr.store(fx, val, flags);
+                    }
+                    CValueInner::ByValPair(val1, val2) => match from.layout().abi {
+                        Abi::ScalarPair(a_scalar, b_scalar) => {
+                            let b_offset =
+                                scalar_pair_calculate_b_offset(fx.tcx, a_scalar, b_scalar);
+                            to_ptr.store(fx, val1, flags);
+                            to_ptr.offset(fx, b_offset).store(fx, val2, flags);
+                        }
+                        _ => bug!("Non ScalarPair abi {:?} for ByValPair CValue", dst_layout.abi),
+                    },
+                    CValueInner::ByRef(from_ptr, None) => {
+                        match from.layout().abi {
+                            Abi::Scalar(_) => {
+                                let val = from.load_scalar(fx);
+                                to_ptr.store(fx, val, flags);
+                                return;
+                            }
+                            Abi::ScalarPair(a_scalar, b_scalar) => {
+                                let b_offset =
+                                    scalar_pair_calculate_b_offset(fx.tcx, a_scalar, b_scalar);
+                                let (val1, val2) = from.load_scalar_pair(fx);
+                                to_ptr.store(fx, val1, flags);
+                                to_ptr.offset(fx, b_offset).store(fx, val2, flags);
+                                return;
+                            }
+                            _ => {}
+                        }
+
+                        let from_addr = from_ptr.get_addr(fx);
+                        let to_addr = to_ptr.get_addr(fx);
+                        let src_layout = from.1;
+                        let size = dst_layout.size.bytes();
+                        let src_align = src_layout.align.abi.bytes() as u8;
+                        let dst_align = dst_layout.align.abi.bytes() as u8;
+                        fx.bcx.emit_small_memory_copy(
+                            fx.target_config,
+                            to_addr,
+                            from_addr,
+                            size,
+                            dst_align,
+                            src_align,
+                            true,
+                            flags,
+                        );
+                    }
+                    CValueInner::ByRef(_, Some(_)) => todo!(),
+                }
+            }
+        }
+    }
+
+    /// Used for `ProjectionElem::Subtype`, `ty` has to be monomorphized before
+    /// passed on.
+    pub(crate) fn place_transmute_type(
+        self,
+        fx: &mut FunctionCx<'_, '_, 'tcx>,
+        ty: Ty<'tcx>,
+    ) -> CPlace<'tcx> {
+        CPlace { inner: self.inner, layout: fx.layout_of(ty) }
+    }
+
+    pub(crate) fn place_field(
+        self,
+        fx: &mut FunctionCx<'_, '_, 'tcx>,
+        field: FieldIdx,
+    ) -> CPlace<'tcx> {
+        let layout = self.layout();
+
+        match self.inner {
+            CPlaceInner::VarPair(local, var1, var2) => {
+                let layout = layout.field(&*fx, field.index());
+
+                match field.as_u32() {
+                    0 => return CPlace { inner: CPlaceInner::Var(local, var1), layout },
+                    1 => return CPlace { inner: CPlaceInner::Var(local, var2), layout },
+                    _ => unreachable!("field should be 0 or 1"),
+                }
+            }
+            _ => {}
+        }
+
+        let (base, extra) = match self.inner {
+            CPlaceInner::Addr(ptr, extra) => (ptr, extra),
+            CPlaceInner::Var(_, _) | CPlaceInner::VarPair(_, _, _) => {
+                bug!("Expected CPlace::Addr, found {:?}", self)
+            }
+        };
+
+        let (field_ptr, field_layout) = codegen_field(fx, base, extra, layout, field);
+        if has_ptr_meta(fx.tcx, field_layout.ty) {
+            CPlace::for_ptr_with_extra(field_ptr, extra.unwrap(), field_layout)
+        } else {
+            CPlace::for_ptr(field_ptr, field_layout)
+        }
+    }
+
+    /// Like [`CPlace::place_field`] except handling ADTs containing a single array field in a way
+    /// such that you can access individual lanes.
+    pub(crate) fn place_lane(
+        self,
+        fx: &mut FunctionCx<'_, '_, 'tcx>,
+        lane_idx: u64,
+    ) -> CPlace<'tcx> {
+        let layout = self.layout();
+        assert!(layout.ty.is_simd());
+        let (lane_count, lane_ty) = layout.ty.simd_size_and_type(fx.tcx);
+        let lane_layout = fx.layout_of(lane_ty);
+        assert!(lane_idx < lane_count);
+
+        match self.inner {
+            CPlaceInner::Var(_, _) => unreachable!(),
+            CPlaceInner::VarPair(_, _, _) => unreachable!(),
+            CPlaceInner::Addr(ptr, None) => {
+                let field_offset = lane_layout.size * lane_idx;
+                let field_ptr = ptr.offset_i64(fx, i64::try_from(field_offset.bytes()).unwrap());
+                CPlace::for_ptr(field_ptr, lane_layout)
+            }
+            CPlaceInner::Addr(_, Some(_)) => unreachable!(),
+        }
+    }
+
+    /// Like [`CPlace::place_field`] except using the passed type as lane type instead of the one
+    /// specified by the vector type.
+    pub(crate) fn place_typed_lane(
+        self,
+        fx: &mut FunctionCx<'_, '_, 'tcx>,
+        lane_ty: Ty<'tcx>,
+        lane_idx: u64,
+    ) -> CPlace<'tcx> {
+        let layout = self.layout();
+        assert!(layout.ty.is_simd());
+        let (orig_lane_count, orig_lane_ty) = layout.ty.simd_size_and_type(fx.tcx);
+        let lane_layout = fx.layout_of(lane_ty);
+        assert!(
+            (lane_idx + 1) * lane_layout.size <= orig_lane_count * fx.layout_of(orig_lane_ty).size
+        );
+
+        match self.inner {
+            CPlaceInner::Var(_, _) => unreachable!(),
+            CPlaceInner::VarPair(_, _, _) => unreachable!(),
+            CPlaceInner::Addr(ptr, None) => {
+                let field_offset = lane_layout.size * lane_idx;
+                let field_ptr = ptr.offset_i64(fx, i64::try_from(field_offset.bytes()).unwrap());
+                CPlace::for_ptr(field_ptr, lane_layout)
+            }
+            CPlaceInner::Addr(_, Some(_)) => unreachable!(),
+        }
+    }
+
+    pub(crate) fn place_index(
+        self,
+        fx: &mut FunctionCx<'_, '_, 'tcx>,
+        index: Value,
+    ) -> CPlace<'tcx> {
+        let (elem_layout, ptr) = match self.layout().ty.kind() {
+            ty::Array(elem_ty, _) => {
+                let elem_layout = fx.layout_of(*elem_ty);
+                match self.inner {
+                    CPlaceInner::Addr(addr, None) => (elem_layout, addr),
+                    CPlaceInner::Var(_, _)
+                    | CPlaceInner::Addr(_, Some(_))
+                    | CPlaceInner::VarPair(_, _, _) => bug!("Can't index into {self:?}"),
+                }
+            }
+            ty::Slice(elem_ty) => (fx.layout_of(*elem_ty), self.to_ptr_unsized().0),
+            _ => bug!("place_index({:?})", self.layout().ty),
+        };
+
+        let offset = fx.bcx.ins().imul_imm(index, elem_layout.size.bytes() as i64);
+
+        CPlace::for_ptr(ptr.offset_value(fx, offset), elem_layout)
+    }
+
+    pub(crate) fn place_deref(self, fx: &mut FunctionCx<'_, '_, 'tcx>) -> CPlace<'tcx> {
+        let inner_layout = fx.layout_of(self.layout().ty.builtin_deref(true).unwrap());
+        if has_ptr_meta(fx.tcx, inner_layout.ty) {
+            let (addr, extra) = self.to_cvalue(fx).load_scalar_pair(fx);
+            CPlace::for_ptr_with_extra(Pointer::new(addr), extra, inner_layout)
+        } else {
+            CPlace::for_ptr(Pointer::new(self.to_cvalue(fx).load_scalar(fx)), inner_layout)
+        }
+    }
+
+    pub(crate) fn place_ref(
+        self,
+        fx: &mut FunctionCx<'_, '_, 'tcx>,
+        layout: TyAndLayout<'tcx>,
+    ) -> CValue<'tcx> {
+        if has_ptr_meta(fx.tcx, self.layout().ty) {
+            let (ptr, extra) = self.to_ptr_unsized();
+            CValue::by_val_pair(ptr.get_addr(fx), extra, layout)
+        } else {
+            CValue::by_val(self.to_ptr().get_addr(fx), layout)
+        }
+    }
+
+    pub(crate) fn downcast_variant(
+        self,
+        fx: &FunctionCx<'_, '_, 'tcx>,
+        variant: VariantIdx,
+    ) -> Self {
+        assert!(self.layout().is_sized());
+        let layout = self.layout().for_variant(fx, variant);
+        CPlace { inner: self.inner, layout }
+    }
+}
+
+#[track_caller]
+pub(crate) fn assert_assignable<'tcx>(
+    fx: &FunctionCx<'_, '_, 'tcx>,
+    from_ty: Ty<'tcx>,
+    to_ty: Ty<'tcx>,
+    limit: usize,
+) {
+    if limit == 0 {
+        // assert_assignable exists solely to catch bugs in cg_clif. it isn't necessary for
+        // soundness. don't attempt to check deep types to avoid exponential behavior in certain
+        // cases.
+        return;
+    }
+    match (from_ty.kind(), to_ty.kind()) {
+        (ty::Ref(_, a, _), ty::Ref(_, b, _)) | (ty::RawPtr(a, _), ty::RawPtr(b, _)) => {
+            assert_assignable(fx, *a, *b, limit - 1);
+        }
+        (ty::Ref(_, a, _), ty::RawPtr(b, _)) | (ty::RawPtr(a, _), ty::Ref(_, b, _)) => {
+            assert_assignable(fx, *a, *b, limit - 1);
+        }
+        (ty::FnPtr(_), ty::FnPtr(_)) => {
+            let from_sig = fx.tcx.normalize_erasing_late_bound_regions(
+                ParamEnv::reveal_all(),
+                from_ty.fn_sig(fx.tcx),
+            );
+            let FnSig {
+                inputs_and_output: types_from,
+                c_variadic: c_variadic_from,
+                unsafety: unsafety_from,
+                abi: abi_from,
+            } = from_sig;
+            let to_sig = fx
+                .tcx
+                .normalize_erasing_late_bound_regions(ParamEnv::reveal_all(), to_ty.fn_sig(fx.tcx));
+            let FnSig {
+                inputs_and_output: types_to,
+                c_variadic: c_variadic_to,
+                unsafety: unsafety_to,
+                abi: abi_to,
+            } = to_sig;
+            let mut types_from = types_from.iter();
+            let mut types_to = types_to.iter();
+            loop {
+                match (types_from.next(), types_to.next()) {
+                    (Some(a), Some(b)) => assert_assignable(fx, a, b, limit - 1),
+                    (None, None) => break,
+                    (Some(_), None) | (None, Some(_)) => panic!("{:#?}/{:#?}", from_ty, to_ty),
+                }
+            }
+            assert_eq!(
+                c_variadic_from, c_variadic_to,
+                "Can't write fn ptr with incompatible sig {:?} to place with sig {:?}\n\n{:#?}",
+                from_sig, to_sig, fx,
+            );
+            assert_eq!(
+                unsafety_from, unsafety_to,
+                "Can't write fn ptr with incompatible sig {:?} to place with sig {:?}\n\n{:#?}",
+                from_sig, to_sig, fx,
+            );
+            assert_eq!(
+                abi_from, abi_to,
+                "Can't write fn ptr with incompatible sig {:?} to place with sig {:?}\n\n{:#?}",
+                from_sig, to_sig, fx,
+            );
+            // fn(&T) -> for<'l> fn(&'l T) is allowed
+        }
+        (&ty::Dynamic(from_traits, _, _from_kind), &ty::Dynamic(to_traits, _, _to_kind)) => {
+            // FIXME(dyn-star): Do the right thing with DynKinds
+            for (from, to) in from_traits.iter().zip(to_traits) {
+                let from =
+                    fx.tcx.normalize_erasing_late_bound_regions(ParamEnv::reveal_all(), from);
+                let to = fx.tcx.normalize_erasing_late_bound_regions(ParamEnv::reveal_all(), to);
+                assert_eq!(
+                    from, to,
+                    "Can't write trait object of incompatible traits {:?} to place with traits {:?}\n\n{:#?}",
+                    from_traits, to_traits, fx,
+                );
+            }
+            // dyn for<'r> Trait<'r> -> dyn Trait<'_> is allowed
+        }
+        (&ty::Tuple(types_a), &ty::Tuple(types_b)) => {
+            let mut types_a = types_a.iter();
+            let mut types_b = types_b.iter();
+            loop {
+                match (types_a.next(), types_b.next()) {
+                    (Some(a), Some(b)) => assert_assignable(fx, a, b, limit - 1),
+                    (None, None) => return,
+                    (Some(_), None) | (None, Some(_)) => panic!("{:#?}/{:#?}", from_ty, to_ty),
+                }
+            }
+        }
+        (&ty::Adt(adt_def_a, args_a), &ty::Adt(adt_def_b, args_b))
+            if adt_def_a.did() == adt_def_b.did() =>
+        {
+            let mut types_a = args_a.types();
+            let mut types_b = args_b.types();
+            loop {
+                match (types_a.next(), types_b.next()) {
+                    (Some(a), Some(b)) => assert_assignable(fx, a, b, limit - 1),
+                    (None, None) => return,
+                    (Some(_), None) | (None, Some(_)) => panic!("{:#?}/{:#?}", from_ty, to_ty),
+                }
+            }
+        }
+        (ty::Array(a, _), ty::Array(b, _)) => assert_assignable(fx, *a, *b, limit - 1),
+        (&ty::Closure(def_id_a, args_a), &ty::Closure(def_id_b, args_b))
+            if def_id_a == def_id_b =>
+        {
+            let mut types_a = args_a.types();
+            let mut types_b = args_b.types();
+            loop {
+                match (types_a.next(), types_b.next()) {
+                    (Some(a), Some(b)) => assert_assignable(fx, a, b, limit - 1),
+                    (None, None) => return,
+                    (Some(_), None) | (None, Some(_)) => panic!("{:#?}/{:#?}", from_ty, to_ty),
+                }
+            }
+        }
+        (&ty::Coroutine(def_id_a, args_a), &ty::Coroutine(def_id_b, args_b))
+            if def_id_a == def_id_b =>
+        {
+            let mut types_a = args_a.types();
+            let mut types_b = args_b.types();
+            loop {
+                match (types_a.next(), types_b.next()) {
+                    (Some(a), Some(b)) => assert_assignable(fx, a, b, limit - 1),
+                    (None, None) => return,
+                    (Some(_), None) | (None, Some(_)) => panic!("{:#?}/{:#?}", from_ty, to_ty),
+                }
+            }
+        }
+        (&ty::CoroutineWitness(def_id_a, args_a), &ty::CoroutineWitness(def_id_b, args_b))
+            if def_id_a == def_id_b =>
+        {
+            let mut types_a = args_a.types();
+            let mut types_b = args_b.types();
+            loop {
+                match (types_a.next(), types_b.next()) {
+                    (Some(a), Some(b)) => assert_assignable(fx, a, b, limit - 1),
+                    (None, None) => return,
+                    (Some(_), None) | (None, Some(_)) => panic!("{:#?}/{:#?}", from_ty, to_ty),
+                }
+            }
+        }
+        (ty::Param(_), _) | (_, ty::Param(_)) if fx.tcx.sess.opts.unstable_opts.polymorphize => {
+            // No way to check if it is correct or not with polymorphization enabled
+        }
+        _ => {
+            assert_eq!(
+                from_ty,
+                to_ty,
+                "Can't write value with incompatible type {:?} to place with type {:?}\n\n{:#?}",
+                from_ty.kind(),
+                to_ty.kind(),
+                fx,
+            );
+        }
+    }
+}
diff --git a/compiler/rustc_codegen_cranelift/src/vtable.rs b/compiler/rustc_codegen_cranelift/src/vtable.rs
new file mode 100644
index 00000000000..14c607ccad7
--- /dev/null
+++ b/compiler/rustc_codegen_cranelift/src/vtable.rs
@@ -0,0 +1,101 @@
+//! Codegen vtables and vtable accesses.
+//!
+//! See `rustc_codegen_ssa/src/meth.rs` for reference.
+
+use crate::constant::data_id_for_vtable;
+use crate::prelude::*;
+
+pub(crate) fn vtable_memflags() -> MemFlags {
+    let mut flags = MemFlags::trusted(); // A vtable access is always aligned and will never trap.
+    flags.set_readonly(); // A vtable is always read-only.
+    flags
+}
+
+pub(crate) fn drop_fn_of_obj(fx: &mut FunctionCx<'_, '_, '_>, vtable: Value) -> Value {
+    let usize_size = fx.layout_of(fx.tcx.types.usize).size.bytes() as usize;
+    fx.bcx.ins().load(
+        fx.pointer_type,
+        vtable_memflags(),
+        vtable,
+        (ty::COMMON_VTABLE_ENTRIES_DROPINPLACE * usize_size) as i32,
+    )
+}
+
+pub(crate) fn size_of_obj(fx: &mut FunctionCx<'_, '_, '_>, vtable: Value) -> Value {
+    let usize_size = fx.layout_of(fx.tcx.types.usize).size.bytes() as usize;
+    fx.bcx.ins().load(
+        fx.pointer_type,
+        vtable_memflags(),
+        vtable,
+        (ty::COMMON_VTABLE_ENTRIES_SIZE * usize_size) as i32,
+    )
+}
+
+pub(crate) fn min_align_of_obj(fx: &mut FunctionCx<'_, '_, '_>, vtable: Value) -> Value {
+    let usize_size = fx.layout_of(fx.tcx.types.usize).size.bytes() as usize;
+    fx.bcx.ins().load(
+        fx.pointer_type,
+        vtable_memflags(),
+        vtable,
+        (ty::COMMON_VTABLE_ENTRIES_ALIGN * usize_size) as i32,
+    )
+}
+
+pub(crate) fn get_ptr_and_method_ref<'tcx>(
+    fx: &mut FunctionCx<'_, '_, 'tcx>,
+    mut arg: CValue<'tcx>,
+    idx: usize,
+) -> (Pointer, Value) {
+    let (ptr, vtable) = 'block: {
+        if let Abi::Scalar(_) = arg.layout().abi {
+            while !arg.layout().ty.is_unsafe_ptr() && !arg.layout().ty.is_ref() {
+                let (idx, _) = arg
+                    .layout()
+                    .non_1zst_field(fx)
+                    .expect("not exactly one non-1-ZST field in a `DispatchFromDyn` type");
+                arg = arg.value_field(fx, FieldIdx::new(idx));
+            }
+        }
+
+        if let ty::Ref(_, ty, _) = arg.layout().ty.kind() {
+            if ty.is_dyn_star() {
+                let inner_layout = fx.layout_of(arg.layout().ty.builtin_deref(true).unwrap());
+                let dyn_star = CPlace::for_ptr(Pointer::new(arg.load_scalar(fx)), inner_layout);
+                let ptr = dyn_star.place_field(fx, FieldIdx::ZERO).to_ptr();
+                let vtable =
+                    dyn_star.place_field(fx, FieldIdx::new(1)).to_cvalue(fx).load_scalar(fx);
+                break 'block (ptr, vtable);
+            }
+        }
+
+        if let Abi::ScalarPair(_, _) = arg.layout().abi {
+            let (ptr, vtable) = arg.load_scalar_pair(fx);
+            (Pointer::new(ptr), vtable)
+        } else {
+            let (ptr, vtable) = arg.try_to_ptr().unwrap();
+            (ptr, vtable.unwrap())
+        }
+    };
+
+    let usize_size = fx.layout_of(fx.tcx.types.usize).size.bytes();
+    let func_ref = fx.bcx.ins().load(
+        fx.pointer_type,
+        vtable_memflags(),
+        vtable,
+        (idx * usize_size as usize) as i32,
+    );
+    (ptr, func_ref)
+}
+
+pub(crate) fn get_vtable<'tcx>(
+    fx: &mut FunctionCx<'_, '_, 'tcx>,
+    ty: Ty<'tcx>,
+    trait_ref: Option<ty::PolyExistentialTraitRef<'tcx>>,
+) -> Value {
+    let data_id = data_id_for_vtable(fx.tcx, &mut fx.constants_cx, fx.module, ty, trait_ref);
+    let local_data_id = fx.module.declare_data_in_func(data_id, fx.bcx.func);
+    if fx.clif_comments.enabled() {
+        fx.add_comment(local_data_id, "vtable");
+    }
+    fx.bcx.ins().global_value(fx.pointer_type, local_data_id)
+}