22 files changed, 994 insertions, 198 deletions
diff --git a/compiler/rustc_codegen_cranelift/src/abi/mod.rs b/compiler/rustc_codegen_cranelift/src/abi/mod.rs
index 5f631405a9a..5f7bf3821d7 100644
--- a/compiler/rustc_codegen_cranelift/src/abi/mod.rs
+++ b/compiler/rustc_codegen_cranelift/src/abi/mod.rs
@@ -20,6 +20,7 @@ use rustc_middle::ty::print::with_no_trimmed_paths;
 use rustc_session::Session;
 use rustc_span::source_map::Spanned;
 use rustc_target::callconv::{Conv, FnAbi, PassMode};
+use smallvec::SmallVec;
 
 use self::pass_mode::*;
 pub(crate) use self::returning::codegen_return;
@@ -153,10 +154,7 @@ impl<'tcx> FunctionCx<'_, '_, 'tcx> {
 
             let ret = self.lib_call_unadjusted(name, params, returns, &args)[0];
 
-            // FIXME(bytecodealliance/wasmtime#6104) use bitcast instead of store to get from i64x2 to i128
-            let ret_ptr = self.create_stack_slot(16, 16);
-            ret_ptr.store(self, ret, MemFlags::trusted());
-            Cow::Owned(vec![ret_ptr.load(self, types::I128, MemFlags::trusted())])
+            Cow::Owned(vec![codegen_bitcast(self, types::I128, ret)])
         } else if ret_single_i128 && self.tcx.sess.target.arch == "s390x" {
             // Return i128 using a return area pointer on s390x.
             let mut params = params;
@@ -184,11 +182,9 @@ impl<'tcx> FunctionCx<'_, '_, 'tcx> {
         let sig = Signature { params, returns, call_conv: self.target_config.default_call_conv };
         let func_id = self.module.declare_function(name, Linkage::Import, &sig).unwrap();
         let func_ref = self.module.declare_func_in_func(func_id, &mut self.bcx.func);
-        if self.clif_comments.enabled() {
-            self.add_comment(func_ref, format!("{:?}", name));
-        }
         let call_inst = self.bcx.ins().call(func_ref, args);
         if self.clif_comments.enabled() {
+            self.add_comment(func_ref, format!("{:?}", name));
             self.add_comment(call_inst, format!("lib_call {}", name));
         }
         let results = self.bcx.inst_results(call_inst);
@@ -384,6 +380,7 @@ pub(crate) fn codegen_terminator_call<'tcx>(
     args: &[Spanned<Operand<'tcx>>],
     destination: Place<'tcx>,
     target: Option<BasicBlock>,
+    _unwind: UnwindAction,
 ) {
     let func = codegen_operand(fx, func);
     let fn_sig = func.layout().ty.fn_sig(fx.tcx);
@@ -529,7 +526,7 @@ pub(crate) fn codegen_terminator_call<'tcx>(
         Some(Instance { def: InstanceKind::Virtual(_, idx), .. }) => {
             if fx.clif_comments.enabled() {
                 let nop_inst = fx.bcx.ins().nop();
-                fx.add_comment(
+                fx.add_post_comment(
                     nop_inst,
                     with_no_trimmed_paths!(format!(
                         "virtual call; self arg pass mode: {:?}",
@@ -555,7 +552,7 @@ pub(crate) fn codegen_terminator_call<'tcx>(
         None => {
             if fx.clif_comments.enabled() {
                 let nop_inst = fx.bcx.ins().nop();
-                fx.add_comment(nop_inst, "indirect call");
+                fx.add_post_comment(nop_inst, "indirect call");
             }
 
             let func = func.load_scalar(fx);
@@ -585,17 +582,18 @@ pub(crate) fn codegen_terminator_call<'tcx>(
             adjust_call_for_c_variadic(fx, &fn_abi, source_info, func_ref, &mut call_args);
         }
 
-        if fx.clif_comments.enabled() {
-            let nop_inst = fx.bcx.ins().nop();
-            with_no_trimmed_paths!(fx.add_comment(nop_inst, format!("abi: {:?}", fn_abi)));
-        }
-
-        match func_ref {
+        let call_inst = match func_ref {
             CallTarget::Direct(func_ref) => fx.bcx.ins().call(func_ref, &call_args),
             CallTarget::Indirect(sig, func_ptr) => {
                 fx.bcx.ins().call_indirect(sig, func_ptr, &call_args)
             }
+        };
+
+        if fx.clif_comments.enabled() {
+            with_no_trimmed_paths!(fx.add_comment(call_inst, format!("abi: {:?}", fn_abi)));
         }
+
+        fx.bcx.func.dfg.inst_results(call_inst).iter().copied().collect::<SmallVec<[Value; 2]>>()
     });
 
     if let Some(dest) = target {
@@ -705,13 +703,16 @@ pub(crate) fn codegen_drop<'tcx>(
     source_info: mir::SourceInfo,
     drop_place: CPlace<'tcx>,
     target: BasicBlock,
+    _unwind: UnwindAction,
 ) {
     let ty = drop_place.layout().ty;
     let drop_instance = Instance::resolve_drop_in_place(fx.tcx, ty);
+    let ret_block = fx.get_block(target);
 
     // AsyncDropGlueCtorShim can't be here
     if let ty::InstanceKind::DropGlue(_, None) = drop_instance.def {
         // we don't actually need to drop anything
+        fx.bcx.ins().jump(ret_block, &[]);
     } else {
         match ty.kind() {
             ty::Dynamic(_, _, ty::Dyn) => {
@@ -748,7 +749,9 @@ pub(crate) fn codegen_drop<'tcx>(
 
                 let sig = clif_sig_from_fn_abi(fx.tcx, fx.target_config.default_call_conv, &fn_abi);
                 let sig = fx.bcx.import_signature(sig);
+                // FIXME implement cleanup on exceptions
                 fx.bcx.ins().call_indirect(sig, drop_fn, &[ptr]);
+                fx.bcx.ins().jump(ret_block, &[]);
             }
             ty::Dynamic(_, _, ty::DynStar) => {
                 // IN THIS ARM, WE HAVE:
@@ -792,6 +795,8 @@ pub(crate) fn codegen_drop<'tcx>(
                 let sig = clif_sig_from_fn_abi(fx.tcx, fx.target_config.default_call_conv, &fn_abi);
                 let sig = fx.bcx.import_signature(sig);
                 fx.bcx.ins().call_indirect(sig, drop_fn, &[data]);
+                // FIXME implement cleanup on exceptions
+                fx.bcx.ins().jump(ret_block, &[]);
             }
             _ => {
                 assert!(!matches!(drop_instance.def, InstanceKind::Virtual(_, _)));
@@ -817,10 +822,37 @@ pub(crate) fn codegen_drop<'tcx>(
 
                 let func_ref = fx.get_function_ref(drop_instance);
                 fx.bcx.ins().call(func_ref, &call_args);
+                // FIXME implement cleanup on exceptions
+                fx.bcx.ins().jump(ret_block, &[]);
             }
         }
     }
+}
 
-    let target_block = fx.get_block(target);
-    fx.bcx.ins().jump(target_block, &[]);
+pub(crate) fn lib_call_arg_param(tcx: TyCtxt<'_>, ty: Type, is_signed: bool) -> AbiParam {
+    let param = AbiParam::new(ty);
+    if ty.is_int() && u64::from(ty.bits()) < tcx.data_layout.pointer_size.bits() {
+        match (&*tcx.sess.target.arch, &*tcx.sess.target.vendor) {
+            ("x86_64", _) | ("aarch64", "apple") => match (ty, is_signed) {
+                (types::I8 | types::I16, true) => param.sext(),
+                (types::I8 | types::I16, false) => param.uext(),
+                _ => param,
+            },
+            ("aarch64", _) => param,
+            ("riscv64", _) => match (ty, is_signed) {
+                (types::I32, _) | (_, true) => param.sext(),
+                _ => param.uext(),
+            },
+            ("s390x", _) => {
+                if is_signed {
+                    param.sext()
+                } else {
+                    param.uext()
+                }
+            }
+            _ => unimplemented!("{:?}", tcx.sess.target.arch),
+        }
+    } else {
+        param
+    }
 }
diff --git a/compiler/rustc_codegen_cranelift/src/abi/pass_mode.rs b/compiler/rustc_codegen_cranelift/src/abi/pass_mode.rs
index 06d89bc9ea7..6d8614aca69 100644
--- a/compiler/rustc_codegen_cranelift/src/abi/pass_mode.rs
+++ b/compiler/rustc_codegen_cranelift/src/abi/pass_mode.rs
@@ -22,8 +22,10 @@ fn reg_to_abi_param(reg: Reg) -> AbiParam {
         (RegKind::Integer, 3..=4) => types::I32,
         (RegKind::Integer, 5..=8) => types::I64,
         (RegKind::Integer, 9..=16) => types::I128,
+        (RegKind::Float, 2) => types::F16,
         (RegKind::Float, 4) => types::F32,
         (RegKind::Float, 8) => types::F64,
+        (RegKind::Float, 16) => types::F128,
         (RegKind::Vector, size) => types::I8.by(u32::try_from(size).unwrap()).unwrap(),
         _ => unreachable!("{:?}", reg),
     };
diff --git a/compiler/rustc_codegen_cranelift/src/abi/returning.rs b/compiler/rustc_codegen_cranelift/src/abi/returning.rs
index 9e048c7badb..36087f96dd7 100644
--- a/compiler/rustc_codegen_cranelift/src/abi/returning.rs
+++ b/compiler/rustc_codegen_cranelift/src/abi/returning.rs
@@ -46,7 +46,7 @@ pub(super) fn codegen_with_call_return_arg<'tcx>(
     fx: &mut FunctionCx<'_, '_, 'tcx>,
     ret_arg_abi: &ArgAbi<'tcx, Ty<'tcx>>,
     ret_place: CPlace<'tcx>,
-    f: impl FnOnce(&mut FunctionCx<'_, '_, 'tcx>, Option<Value>) -> Inst,
+    f: impl FnOnce(&mut FunctionCx<'_, '_, 'tcx>, Option<Value>) -> SmallVec<[Value; 2]>,
 ) {
     let (ret_temp_place, return_ptr) = match ret_arg_abi.mode {
         PassMode::Ignore => (None, None),
@@ -67,23 +67,21 @@ pub(super) fn codegen_with_call_return_arg<'tcx>(
         PassMode::Direct(_) | PassMode::Pair(_, _) | PassMode::Cast { .. } => (None, None),
     };
 
-    let call_inst = f(fx, return_ptr);
+    let results = f(fx, return_ptr);
 
     match ret_arg_abi.mode {
         PassMode::Ignore => {}
         PassMode::Direct(_) => {
-            let ret_val = fx.bcx.inst_results(call_inst)[0];
+            let ret_val = results[0];
             ret_place.write_cvalue(fx, CValue::by_val(ret_val, ret_arg_abi.layout));
         }
         PassMode::Pair(_, _) => {
-            let ret_val_a = fx.bcx.inst_results(call_inst)[0];
-            let ret_val_b = fx.bcx.inst_results(call_inst)[1];
+            let ret_val_a = results[0];
+            let ret_val_b = results[1];
             ret_place
                 .write_cvalue(fx, CValue::by_val_pair(ret_val_a, ret_val_b, ret_arg_abi.layout));
         }
         PassMode::Cast { ref cast, .. } => {
-            let results =
-                fx.bcx.inst_results(call_inst).iter().copied().collect::<SmallVec<[Value; 2]>>();
             let result =
                 super::pass_mode::from_casted_value(fx, &results, ret_place.layout(), cast);
             ret_place.write_cvalue(fx, result);
diff --git a/compiler/rustc_codegen_cranelift/src/base.rs b/compiler/rustc_codegen_cranelift/src/base.rs
index 524e0d9fe35..4617304105a 100644
--- a/compiler/rustc_codegen_cranelift/src/base.rs
+++ b/compiler/rustc_codegen_cranelift/src/base.rs
@@ -15,9 +15,9 @@ use rustc_middle::ty::print::with_no_trimmed_paths;
 
 use crate::constant::ConstantCx;
 use crate::debuginfo::{FunctionDebugContext, TypeDebugContext};
-use crate::enable_verifier;
 use crate::prelude::*;
 use crate::pretty_clif::CommentWriter;
+use crate::{codegen_f16_f128, enable_verifier};
 
 pub(crate) struct CodegenedFunction {
     symbol_name: String,
@@ -193,6 +193,18 @@ pub(crate) fn compile_fn(
                     name = codegened_func.symbol_name
                 ));
             }
+            Err(ModuleError::Compilation(CodegenError::Verifier(err))) => {
+                let early_dcx = rustc_session::EarlyDiagCtxt::new(
+                    rustc_session::config::ErrorOutputType::default(),
+                );
+                let _ = early_dcx.early_err(format!("{:?}", err));
+                let pretty_error = cranelift_codegen::print_errors::pretty_verifier_error(
+                    &context.func,
+                    Some(Box::new(&clif_comments)),
+                    err,
+                );
+                early_dcx.early_fatal(format!("cranelift verify error:\n{}", pretty_error));
+            }
             Err(err) => {
                 panic!("Error while defining {name}: {err:?}", name = codegened_func.symbol_name);
             }
@@ -303,7 +315,7 @@ fn codegen_fn_body(fx: &mut FunctionCx<'_, '_, '_>, start_block: Block) {
                 bb_data.terminator().kind.fmt_head(&mut terminator_head).unwrap();
             });
             let inst = fx.bcx.func.layout.last_inst(block).unwrap();
-            fx.add_comment(inst, terminator_head);
+            fx.add_post_comment(inst, terminator_head);
         }
 
         let source_info = bb_data.terminator().source_info;
@@ -337,7 +349,7 @@ fn codegen_fn_body(fx: &mut FunctionCx<'_, '_, '_>, start_block: Block) {
             TerminatorKind::Return => {
                 crate::abi::codegen_return(fx);
             }
-            TerminatorKind::Assert { cond, expected, msg, target, unwind: _ } => {
+            TerminatorKind::Assert { cond, expected, msg, target, unwind } => {
                 if !fx.tcx.sess.overflow_checks() && msg.is_optional_overflow_check() {
                     let target = fx.get_block(*target);
                     fx.bcx.ins().jump(target, &[]);
@@ -367,6 +379,7 @@ fn codegen_fn_body(fx: &mut FunctionCx<'_, '_, '_>, start_block: Block) {
                             fx,
                             rustc_hir::LangItem::PanicBoundsCheck,
                             &[index, len, location],
+                            *unwind,
                             Some(source_info.span),
                         );
                     }
@@ -379,6 +392,7 @@ fn codegen_fn_body(fx: &mut FunctionCx<'_, '_, '_>, start_block: Block) {
                             fx,
                             rustc_hir::LangItem::PanicMisalignedPointerDereference,
                             &[required, found, location],
+                            *unwind,
                             Some(source_info.span),
                         );
                     }
@@ -389,6 +403,7 @@ fn codegen_fn_body(fx: &mut FunctionCx<'_, '_, '_>, start_block: Block) {
                             fx,
                             rustc_hir::LangItem::PanicNullPointerDereference,
                             &[location],
+                            *unwind,
                             Some(source_info.span),
                         )
                     }
@@ -399,6 +414,7 @@ fn codegen_fn_body(fx: &mut FunctionCx<'_, '_, '_>, start_block: Block) {
                             fx,
                             msg.panic_function(),
                             &[location],
+                            *unwind,
                             Some(source_info.span),
                         );
                     }
@@ -457,7 +473,7 @@ fn codegen_fn_body(fx: &mut FunctionCx<'_, '_, '_>, start_block: Block) {
                 destination,
                 target,
                 fn_span,
-                unwind: _,
+                unwind,
                 call_source: _,
             } => {
                 fx.tcx.prof.generic_activity("codegen call").run(|| {
@@ -468,6 +484,7 @@ fn codegen_fn_body(fx: &mut FunctionCx<'_, '_, '_>, start_block: Block) {
                         args,
                         *destination,
                         *target,
+                        *unwind,
                     )
                 });
             }
@@ -514,7 +531,7 @@ fn codegen_fn_body(fx: &mut FunctionCx<'_, '_, '_>, start_block: Block) {
                 );
             }
             TerminatorKind::UnwindTerminate(reason) => {
-                codegen_unwind_terminate(fx, source_info, *reason);
+                codegen_unwind_terminate(fx, Some(source_info.span), *reason);
             }
             TerminatorKind::UnwindResume => {
                 // FIXME implement unwinding
@@ -530,23 +547,19 @@ fn codegen_fn_body(fx: &mut FunctionCx<'_, '_, '_>, start_block: Block) {
             | TerminatorKind::CoroutineDrop => {
                 bug!("shouldn't exist at codegen {:?}", bb_data.terminator());
             }
-            TerminatorKind::Drop { place, target, unwind: _, replace: _, drop, async_fut } => {
+            TerminatorKind::Drop { place, target, unwind, replace: _, drop, async_fut } => {
                 assert!(
                     async_fut.is_none() && drop.is_none(),
                     "Async Drop must be expanded or reset to sync before codegen"
                 );
                 let drop_place = codegen_place(fx, *place);
-                crate::abi::codegen_drop(fx, source_info, drop_place, *target);
+                crate::abi::codegen_drop(fx, source_info, drop_place, *target, *unwind);
             }
         };
     }
 }
 
-fn codegen_stmt<'tcx>(
-    fx: &mut FunctionCx<'_, '_, 'tcx>,
-    #[allow(unused_variables)] cur_block: Block,
-    stmt: &Statement<'tcx>,
-) {
+fn codegen_stmt<'tcx>(fx: &mut FunctionCx<'_, '_, 'tcx>, cur_block: Block, stmt: &Statement<'tcx>) {
     let _print_guard = crate::PrintOnPanic(|| format!("stmt {:?}", stmt));
 
     fx.set_debug_loc(stmt.source_info);
@@ -557,7 +570,7 @@ fn codegen_stmt<'tcx>(
             if fx.clif_comments.enabled() {
                 let inst = fx.bcx.func.layout.last_inst(cur_block).unwrap();
                 with_no_trimmed_paths!({
-                    fx.add_comment(inst, format!("{:?}", stmt));
+                    fx.add_post_comment(inst, format!("{:?}", stmt));
                 });
             }
         }
@@ -622,6 +635,15 @@ fn codegen_stmt<'tcx>(
                             let val = operand.load_scalar(fx);
                             match layout.ty.kind() {
                                 ty::Int(_) => CValue::by_val(fx.bcx.ins().ineg(val), layout),
+                                // FIXME(bytecodealliance/wasmtime#8312): Remove
+                                // once backend lowerings have been added to
+                                // Cranelift.
+                                ty::Float(FloatTy::F16) => {
+                                    CValue::by_val(codegen_f16_f128::neg_f16(fx, val), layout)
+                                }
+                                ty::Float(FloatTy::F128) => {
+                                    CValue::by_val(codegen_f16_f128::neg_f128(fx, val), layout)
+                                }
                                 ty::Float(_) => CValue::by_val(fx.bcx.ins().fneg(val), layout),
                                 _ => unreachable!("un op Neg for {:?}", layout.ty),
                             }
@@ -793,7 +815,7 @@ fn codegen_stmt<'tcx>(
                         let done_block = fx.bcx.create_block();
                         let index = fx.bcx.append_block_param(loop_block, fx.pointer_type);
                         let zero = fx.bcx.ins().iconst(fx.pointer_type, 0);
-                        fx.bcx.ins().jump(loop_block, &[zero]);
+                        fx.bcx.ins().jump(loop_block, &[zero.into()]);
 
                         fx.bcx.switch_to_block(loop_block);
                         let done = fx.bcx.ins().icmp_imm(IntCC::Equal, index, times as i64);
@@ -803,7 +825,7 @@ fn codegen_stmt<'tcx>(
                         let to = lval.place_index(fx, index);
                         to.write_cvalue(fx, operand);
                         let index = fx.bcx.ins().iadd_imm(index, 1);
-                        fx.bcx.ins().jump(loop_block, &[index]);
+                        fx.bcx.ins().jump(loop_block, &[index.into()]);
 
                         fx.bcx.switch_to_block(done_block);
                         fx.bcx.ins().nop();
@@ -1058,23 +1080,28 @@ pub(crate) fn codegen_panic_nounwind<'tcx>(
     let msg_len = fx.bcx.ins().iconst(fx.pointer_type, i64::try_from(msg_str.len()).unwrap());
     let args = [msg_ptr, msg_len];
 
-    codegen_panic_inner(fx, rustc_hir::LangItem::PanicNounwind, &args, span);
+    codegen_panic_inner(
+        fx,
+        rustc_hir::LangItem::PanicNounwind,
+        &args,
+        UnwindAction::Terminate(UnwindTerminateReason::Abi),
+        span,
+    );
 }
 
 pub(crate) fn codegen_unwind_terminate<'tcx>(
     fx: &mut FunctionCx<'_, '_, 'tcx>,
-    source_info: mir::SourceInfo,
+    span: Option<Span>,
     reason: UnwindTerminateReason,
 ) {
-    let args = [];
-
-    codegen_panic_inner(fx, reason.lang_item(), &args, Some(source_info.span));
+    codegen_panic_inner(fx, reason.lang_item(), &[], UnwindAction::Unreachable, span);
 }
 
 fn codegen_panic_inner<'tcx>(
     fx: &mut FunctionCx<'_, '_, 'tcx>,
     lang_item: rustc_hir::LangItem,
     args: &[Value],
+    _unwind: UnwindAction,
     span: Option<Span>,
 ) {
     fx.bcx.set_cold_block(fx.bcx.current_block().unwrap());
@@ -1090,6 +1117,8 @@ fn codegen_panic_inner<'tcx>(
 
     let symbol_name = fx.tcx.symbol_name(instance).name;
 
+    // FIXME implement cleanup on exceptions
+
     fx.lib_call(
         symbol_name,
         args.iter().map(|&arg| AbiParam::new(fx.bcx.func.dfg.value_type(arg))).collect(),
diff --git a/compiler/rustc_codegen_cranelift/src/cast.rs b/compiler/rustc_codegen_cranelift/src/cast.rs
index e2346324232..8a725680e70 100644
--- a/compiler/rustc_codegen_cranelift/src/cast.rs
+++ b/compiler/rustc_codegen_cranelift/src/cast.rs
@@ -1,5 +1,6 @@
 //! Various number casting functions
 
+use crate::codegen_f16_f128;
 use crate::prelude::*;
 
 pub(crate) fn clif_intcast(
@@ -36,6 +37,14 @@ pub(crate) fn clif_int_or_float_cast(
 ) -> Value {
     let from_ty = fx.bcx.func.dfg.value_type(from);
 
+    // FIXME(bytecodealliance/wasmtime#8312): Remove in favour of native
+    // Cranelift operations once Cranelift backends have lowerings for them.
+    if matches!(from_ty, types::F16 | types::F128)
+        || matches!(to_ty, types::F16 | types::F128) && from_ty != to_ty
+    {
+        return codegen_f16_f128::codegen_cast(fx, from, from_signed, to_ty, to_signed);
+    }
+
     if from_ty.is_int() && to_ty.is_int() {
         // int-like -> int-like
         clif_intcast(
@@ -58,8 +67,10 @@ pub(crate) fn clif_int_or_float_cast(
                 "__float{sign}ti{flt}f",
                 sign = if from_signed { "" } else { "un" },
                 flt = match to_ty {
+                    types::F16 => "h",
                     types::F32 => "s",
                     types::F64 => "d",
+                    types::F128 => "t",
                     _ => unreachable!("{:?}", to_ty),
                 },
             );
@@ -90,8 +101,10 @@ pub(crate) fn clif_int_or_float_cast(
                 "__fix{sign}{flt}fti",
                 sign = if to_signed { "" } else { "uns" },
                 flt = match from_ty {
+                    types::F16 => "h",
                     types::F32 => "s",
                     types::F64 => "d",
+                    types::F128 => "t",
                     _ => unreachable!("{:?}", to_ty),
                 },
             );
@@ -145,8 +158,12 @@ pub(crate) fn clif_int_or_float_cast(
     } else if from_ty.is_float() && to_ty.is_float() {
         // float -> float
         match (from_ty, to_ty) {
-            (types::F32, types::F64) => fx.bcx.ins().fpromote(types::F64, from),
-            (types::F64, types::F32) => fx.bcx.ins().fdemote(types::F32, from),
+            (types::F16, types::F32 | types::F64 | types::F128)
+            | (types::F32, types::F64 | types::F128)
+            | (types::F64, types::F128) => fx.bcx.ins().fpromote(to_ty, from),
+            (types::F128, types::F64 | types::F32 | types::F16)
+            | (types::F64, types::F32 | types::F16)
+            | (types::F32, types::F16) => fx.bcx.ins().fdemote(to_ty, from),
             _ => from,
         }
     } else {
diff --git a/compiler/rustc_codegen_cranelift/src/codegen_f16_f128.rs b/compiler/rustc_codegen_cranelift/src/codegen_f16_f128.rs
new file mode 100644
index 00000000000..1e202be1f18
--- /dev/null
+++ b/compiler/rustc_codegen_cranelift/src/codegen_f16_f128.rs
@@ -0,0 +1,284 @@
+use crate::prelude::*;
+
+pub(crate) fn f16_to_f32(fx: &mut FunctionCx<'_, '_, '_>, value: Value) -> Value {
+    let (value, arg_ty) =
+        if fx.tcx.sess.target.vendor == "apple" && fx.tcx.sess.target.arch == "x86_64" {
+            (
+                fx.bcx.ins().bitcast(types::I16, MemFlags::new(), value),
+                lib_call_arg_param(fx.tcx, types::I16, false),
+            )
+        } else {
+            (value, AbiParam::new(types::F16))
+        };
+    fx.lib_call("__extendhfsf2", vec![arg_ty], vec![AbiParam::new(types::F32)], &[value])[0]
+}
+
+fn f16_to_f64(fx: &mut FunctionCx<'_, '_, '_>, value: Value) -> Value {
+    let ret = f16_to_f32(fx, value);
+    fx.bcx.ins().fpromote(types::F64, ret)
+}
+
+pub(crate) fn f32_to_f16(fx: &mut FunctionCx<'_, '_, '_>, value: Value) -> Value {
+    let ret_ty = if fx.tcx.sess.target.vendor == "apple" && fx.tcx.sess.target.arch == "x86_64" {
+        types::I16
+    } else {
+        types::F16
+    };
+    let ret = fx.lib_call(
+        "__truncsfhf2",
+        vec![AbiParam::new(types::F32)],
+        vec![AbiParam::new(ret_ty)],
+        &[value],
+    )[0];
+    if ret_ty == types::I16 { fx.bcx.ins().bitcast(types::F16, MemFlags::new(), ret) } else { ret }
+}
+
+fn f64_to_f16(fx: &mut FunctionCx<'_, '_, '_>, value: Value) -> Value {
+    let ret_ty = if fx.tcx.sess.target.vendor == "apple" && fx.tcx.sess.target.arch == "x86_64" {
+        types::I16
+    } else {
+        types::F16
+    };
+    let ret = fx.lib_call(
+        "__truncdfhf2",
+        vec![AbiParam::new(types::F64)],
+        vec![AbiParam::new(ret_ty)],
+        &[value],
+    )[0];
+    if ret_ty == types::I16 { fx.bcx.ins().bitcast(types::F16, MemFlags::new(), ret) } else { ret }
+}
+
+pub(crate) fn fcmp(fx: &mut FunctionCx<'_, '_, '_>, cc: FloatCC, lhs: Value, rhs: Value) -> Value {
+    let ty = fx.bcx.func.dfg.value_type(lhs);
+    match ty {
+        types::F32 | types::F64 => fx.bcx.ins().fcmp(cc, lhs, rhs),
+        types::F16 => {
+            let lhs = f16_to_f32(fx, lhs);
+            let rhs = f16_to_f32(fx, rhs);
+            fx.bcx.ins().fcmp(cc, lhs, rhs)
+        }
+        types::F128 => {
+            let (name, int_cc) = match cc {
+                FloatCC::Equal => ("__eqtf2", IntCC::Equal),
+                FloatCC::NotEqual => ("__netf2", IntCC::NotEqual),
+                FloatCC::LessThan => ("__lttf2", IntCC::SignedLessThan),
+                FloatCC::LessThanOrEqual => ("__letf2", IntCC::SignedLessThanOrEqual),
+                FloatCC::GreaterThan => ("__gttf2", IntCC::SignedGreaterThan),
+                FloatCC::GreaterThanOrEqual => ("__getf2", IntCC::SignedGreaterThanOrEqual),
+                _ => unreachable!("not currently used in rustc_codegen_cranelift: {cc:?}"),
+            };
+            let res = fx.lib_call(
+                name,
+                vec![AbiParam::new(types::F128), AbiParam::new(types::F128)],
+                // FIXME(rust-lang/compiler-builtins#919): This should be `I64` on non-AArch64
+                // architectures, but switching it before compiler-builtins is fixed causes test
+                // failures.
+                vec![AbiParam::new(types::I32)],
+                &[lhs, rhs],
+            )[0];
+            let zero = fx.bcx.ins().iconst(types::I32, 0);
+            let res = fx.bcx.ins().icmp(int_cc, res, zero);
+            res
+        }
+        _ => unreachable!("{ty:?}"),
+    }
+}
+
+pub(crate) fn codegen_f128_binop(
+    fx: &mut FunctionCx<'_, '_, '_>,
+    bin_op: BinOp,
+    lhs: Value,
+    rhs: Value,
+) -> Value {
+    let name = match bin_op {
+        BinOp::Add => "__addtf3",
+        BinOp::Sub => "__subtf3",
+        BinOp::Mul => "__multf3",
+        BinOp::Div => "__divtf3",
+        _ => unreachable!("handled in `codegen_float_binop`"),
+    };
+    fx.lib_call(
+        name,
+        vec![AbiParam::new(types::F128), AbiParam::new(types::F128)],
+        vec![AbiParam::new(types::F128)],
+        &[lhs, rhs],
+    )[0]
+}
+
+pub(crate) fn neg_f16(fx: &mut FunctionCx<'_, '_, '_>, value: Value) -> Value {
+    let bits = fx.bcx.ins().bitcast(types::I16, MemFlags::new(), value);
+    let bits = fx.bcx.ins().bxor_imm(bits, 0x8000);
+    fx.bcx.ins().bitcast(types::F16, MemFlags::new(), bits)
+}
+
+pub(crate) fn neg_f128(fx: &mut FunctionCx<'_, '_, '_>, value: Value) -> Value {
+    let bits = fx.bcx.ins().bitcast(types::I128, MemFlags::new(), value);
+    let (low, high) = fx.bcx.ins().isplit(bits);
+    let high = fx.bcx.ins().bxor_imm(high, 0x8000_0000_0000_0000_u64 as i64);
+    let bits = fx.bcx.ins().iconcat(low, high);
+    fx.bcx.ins().bitcast(types::F128, MemFlags::new(), bits)
+}
+
+pub(crate) fn abs_f16(fx: &mut FunctionCx<'_, '_, '_>, value: Value) -> Value {
+    let bits = fx.bcx.ins().bitcast(types::I16, MemFlags::new(), value);
+    let bits = fx.bcx.ins().band_imm(bits, 0x7fff);
+    fx.bcx.ins().bitcast(types::F16, MemFlags::new(), bits)
+}
+
+pub(crate) fn abs_f128(fx: &mut FunctionCx<'_, '_, '_>, value: Value) -> Value {
+    let bits = fx.bcx.ins().bitcast(types::I128, MemFlags::new(), value);
+    let (low, high) = fx.bcx.ins().isplit(bits);
+    let high = fx.bcx.ins().band_imm(high, 0x7fff_ffff_ffff_ffff_u64 as i64);
+    let bits = fx.bcx.ins().iconcat(low, high);
+    fx.bcx.ins().bitcast(types::F128, MemFlags::new(), bits)
+}
+
+pub(crate) fn copysign_f16(fx: &mut FunctionCx<'_, '_, '_>, lhs: Value, rhs: Value) -> Value {
+    let lhs = fx.bcx.ins().bitcast(types::I16, MemFlags::new(), lhs);
+    let rhs = fx.bcx.ins().bitcast(types::I16, MemFlags::new(), rhs);
+    let res = fx.bcx.ins().band_imm(lhs, 0x7fff);
+    let sign = fx.bcx.ins().band_imm(rhs, 0x8000);
+    let res = fx.bcx.ins().bor(res, sign);
+    fx.bcx.ins().bitcast(types::F16, MemFlags::new(), res)
+}
+
+pub(crate) fn copysign_f128(fx: &mut FunctionCx<'_, '_, '_>, lhs: Value, rhs: Value) -> Value {
+    let lhs = fx.bcx.ins().bitcast(types::I128, MemFlags::new(), lhs);
+    let rhs = fx.bcx.ins().bitcast(types::I128, MemFlags::new(), rhs);
+    let (low, lhs_high) = fx.bcx.ins().isplit(lhs);
+    let (_, rhs_high) = fx.bcx.ins().isplit(rhs);
+    let high = fx.bcx.ins().band_imm(lhs_high, 0x7fff_ffff_ffff_ffff_u64 as i64);
+    let sign = fx.bcx.ins().band_imm(rhs_high, 0x8000_0000_0000_0000_u64 as i64);
+    let high = fx.bcx.ins().bor(high, sign);
+    let res = fx.bcx.ins().iconcat(low, high);
+    fx.bcx.ins().bitcast(types::F128, MemFlags::new(), res)
+}
+
+pub(crate) fn codegen_cast(
+    fx: &mut FunctionCx<'_, '_, '_>,
+    from: Value,
+    from_signed: bool,
+    to_ty: Type,
+    to_signed: bool,
+) -> Value {
+    let from_ty = fx.bcx.func.dfg.value_type(from);
+    if from_ty.is_float() && to_ty.is_float() {
+        let name = match (from_ty, to_ty) {
+            (types::F16, types::F32) => return f16_to_f32(fx, from),
+            (types::F16, types::F64) => return f16_to_f64(fx, from),
+            (types::F16, types::F128) => "__extendhftf2",
+            (types::F32, types::F128) => "__extendsftf2",
+            (types::F64, types::F128) => "__extenddftf2",
+            (types::F128, types::F64) => "__trunctfdf2",
+            (types::F128, types::F32) => "__trunctfsf2",
+            (types::F128, types::F16) => "__trunctfhf2",
+            (types::F64, types::F16) => return f64_to_f16(fx, from),
+            (types::F32, types::F16) => return f32_to_f16(fx, from),
+            _ => unreachable!("{from_ty:?} -> {to_ty:?}"),
+        };
+        fx.lib_call(name, vec![AbiParam::new(from_ty)], vec![AbiParam::new(to_ty)], &[from])[0]
+    } else if from_ty.is_int() && to_ty == types::F16 {
+        let res = clif_int_or_float_cast(fx, from, from_signed, types::F32, false);
+        f32_to_f16(fx, res)
+    } else if from_ty == types::F16 && to_ty.is_int() {
+        let from = f16_to_f32(fx, from);
+        clif_int_or_float_cast(fx, from, false, to_ty, to_signed)
+    } else if from_ty.is_int() && to_ty == types::F128 {
+        let (from, from_ty) = if from_ty.bits() < 32 {
+            (clif_int_or_float_cast(fx, from, from_signed, types::I32, from_signed), types::I32)
+        } else {
+            (from, from_ty)
+        };
+        let name = format!(
+            "__float{sign}{size}itf",
+            sign = if from_signed { "" } else { "un" },
+            size = match from_ty {
+                types::I32 => 's',
+                types::I64 => 'd',
+                types::I128 => 't',
+                _ => unreachable!("{from_ty:?}"),
+            },
+        );
+        fx.lib_call(
+            &name,
+            vec![lib_call_arg_param(fx.tcx, from_ty, from_signed)],
+            vec![AbiParam::new(to_ty)],
+            &[from],
+        )[0]
+    } else if from_ty == types::F128 && to_ty.is_int() {
+        let ret_ty = if to_ty.bits() < 32 { types::I32 } else { to_ty };
+        let name = format!(
+            "__fix{sign}tf{size}i",
+            sign = if from_signed { "" } else { "un" },
+            size = match ret_ty {
+                types::I32 => 's',
+                types::I64 => 'd',
+                types::I128 => 't',
+                _ => unreachable!("{from_ty:?}"),
+            },
+        );
+        let ret =
+            fx.lib_call(&name, vec![AbiParam::new(from_ty)], vec![AbiParam::new(to_ty)], &[from])
+                [0];
+        let val = if ret_ty == to_ty {
+            ret
+        } else {
+            let (min, max) = match (to_ty, to_signed) {
+                (types::I8, false) => (0, i64::from(u8::MAX)),
+                (types::I16, false) => (0, i64::from(u16::MAX)),
+                (types::I8, true) => (i64::from(i8::MIN as u32), i64::from(i8::MAX as u32)),
+                (types::I16, true) => (i64::from(i16::MIN as u32), i64::from(i16::MAX as u32)),
+                _ => unreachable!("{to_ty:?}"),
+            };
+            let min_val = fx.bcx.ins().iconst(types::I32, min);
+            let max_val = fx.bcx.ins().iconst(types::I32, max);
+
+            let val = if to_signed {
+                let has_underflow = fx.bcx.ins().icmp_imm(IntCC::SignedLessThan, ret, min);
+                let has_overflow = fx.bcx.ins().icmp_imm(IntCC::SignedGreaterThan, ret, max);
+                let bottom_capped = fx.bcx.ins().select(has_underflow, min_val, ret);
+                fx.bcx.ins().select(has_overflow, max_val, bottom_capped)
+            } else {
+                let has_overflow = fx.bcx.ins().icmp_imm(IntCC::UnsignedGreaterThan, ret, max);
+                fx.bcx.ins().select(has_overflow, max_val, ret)
+            };
+            fx.bcx.ins().ireduce(to_ty, val)
+        };
+
+        if let Some(false) = fx.tcx.sess.opts.unstable_opts.saturating_float_casts {
+            return val;
+        }
+
+        let is_not_nan = fcmp(fx, FloatCC::Equal, from, from);
+        let zero = type_zero_value(&mut fx.bcx, to_ty);
+        fx.bcx.ins().select(is_not_nan, val, zero)
+    } else {
+        unreachable!("{from_ty:?} -> {to_ty:?}");
+    }
+}
+
+pub(crate) fn fma_f16(fx: &mut FunctionCx<'_, '_, '_>, x: Value, y: Value, z: Value) -> Value {
+    let x = f16_to_f64(fx, x);
+    let y = f16_to_f64(fx, y);
+    let z = f16_to_f64(fx, z);
+    let res = fx.bcx.ins().fma(x, y, z);
+    f64_to_f16(fx, res)
+}
+
+pub(crate) fn fmin_f128(fx: &mut FunctionCx<'_, '_, '_>, a: Value, b: Value) -> Value {
+    fx.lib_call(
+        "fminimumf128",
+        vec![AbiParam::new(types::F128), AbiParam::new(types::F128)],
+        vec![AbiParam::new(types::F128)],
+        &[a, b],
+    )[0]
+}
+
+pub(crate) fn fmax_f128(fx: &mut FunctionCx<'_, '_, '_>, a: Value, b: Value) -> Value {
+    fx.lib_call(
+        "fmaximumf128",
+        vec![AbiParam::new(types::F128), AbiParam::new(types::F128)],
+        vec![AbiParam::new(types::F128)],
+        &[a, b],
+    )[0]
+}
diff --git a/compiler/rustc_codegen_cranelift/src/common.rs b/compiler/rustc_codegen_cranelift/src/common.rs
index abe2972ba0c..2f11b2d2dcc 100644
--- a/compiler/rustc_codegen_cranelift/src/common.rs
+++ b/compiler/rustc_codegen_cranelift/src/common.rs
@@ -33,10 +33,10 @@ pub(crate) fn scalar_to_clif_type(tcx: TyCtxt<'_>, scalar: Scalar) -> Type {
             Integer::I128 => types::I128,
         },
         Primitive::Float(float) => match float {
-            Float::F16 => unimplemented!("f16_f128"),
+            Float::F16 => types::F16,
             Float::F32 => types::F32,
             Float::F64 => types::F64,
-            Float::F128 => unimplemented!("f16_f128"),
+            Float::F128 => types::F128,
         },
         // FIXME(erikdesjardins): handle non-default addrspace ptr sizes
         Primitive::Pointer(_) => pointer_ty(tcx),
@@ -64,10 +64,10 @@ fn clif_type_from_ty<'tcx>(tcx: TyCtxt<'tcx>, ty: Ty<'tcx>) -> Option<types::Typ
         },
         ty::Char => types::I32,
         ty::Float(size) => match size {
-            FloatTy::F16 => unimplemented!("f16_f128"),
+            FloatTy::F16 => types::F16,
             FloatTy::F32 => types::F32,
             FloatTy::F64 => types::F64,
-            FloatTy::F128 => unimplemented!("f16_f128"),
+            FloatTy::F128 => types::F128,
         },
         ty::FnPtr(..) => pointer_ty(tcx),
         ty::RawPtr(pointee_ty, _) | ty::Ref(_, pointee_ty, _) => {
diff --git a/compiler/rustc_codegen_cranelift/src/compiler_builtins.rs b/compiler/rustc_codegen_cranelift/src/compiler_builtins.rs
index bf16e81a06f..6eea19211fa 100644
--- a/compiler/rustc_codegen_cranelift/src/compiler_builtins.rs
+++ b/compiler/rustc_codegen_cranelift/src/compiler_builtins.rs
@@ -46,15 +46,100 @@ builtin_functions! {
     fn __rust_u128_mulo(a: u128, b: u128, oflow: &mut i32) -> u128;
     fn __rust_i128_mulo(a: i128, b: i128, oflow: &mut i32) -> i128;
 
-    // floats
+    // integer -> float
     fn __floattisf(i: i128) -> f32;
     fn __floattidf(i: i128) -> f64;
+    fn __floatsitf(i: i32) -> f128;
+    fn __floatditf(i: i64) -> f128;
+    fn __floattitf(i: i128) -> f128;
     fn __floatuntisf(i: u128) -> f32;
     fn __floatuntidf(i: u128) -> f64;
+    fn __floatunsitf(i: u32) -> f128;
+    fn __floatunditf(i: u64) -> f128;
+    fn __floatuntitf(i: u128) -> f128;
+    // float -> integer
     fn __fixsfti(f: f32) -> i128;
     fn __fixdfti(f: f64) -> i128;
+    fn __fixtfsi(f: f128) -> i32;
+    fn __fixtfdi(f: f128) -> i64;
+    fn __fixtfti(f: f128) -> i128;
     fn __fixunssfti(f: f32) -> u128;
     fn __fixunsdfti(f: f64) -> u128;
+    fn __fixunstfsi(f: f128) -> u32;
+    fn __fixunstfdi(f: f128) -> u64;
+    fn __fixunstfti(f: f128) -> u128;
+    // float -> float
+    fn __extendhfsf2(f: f16) -> f32;
+    fn __extendhftf2(f: f16) -> f128;
+    fn __extendsftf2(f: f32) -> f128;
+    fn __extenddftf2(f: f64) -> f128;
+    fn __trunctfdf2(f: f128) -> f64;
+    fn __trunctfsf2(f: f128) -> f32;
+    fn __trunctfhf2(f: f128) -> f16;
+    fn __truncdfhf2(f: f64) -> f16;
+    fn __truncsfhf2(f: f32) -> f16;
+    // float binops
+    fn __addtf3(a: f128, b: f128) -> f128;
+    fn __subtf3(a: f128, b: f128) -> f128;
+    fn __multf3(a: f128, b: f128) -> f128;
+    fn __divtf3(a: f128, b: f128) -> f128;
+    fn fmodf(a: f32, b: f32) -> f32;
+    fn fmod(a: f64, b: f64) -> f64;
+    fn fmodf128(a: f128, b: f128) -> f128;
+    // float comparison
+    fn __eqtf2(a: f128, b: f128) -> i32;
+    fn __netf2(a: f128, b: f128) -> i32;
+    fn __lttf2(a: f128, b: f128) -> i32;
+    fn __letf2(a: f128, b: f128) -> i32;
+    fn __gttf2(a: f128, b: f128) -> i32;
+    fn __getf2(a: f128, b: f128) -> i32;
+    fn fminimumf128(a: f128, b: f128) -> f128;
+    fn fmaximumf128(a: f128, b: f128) -> f128;
+    // Cranelift float libcalls
+    fn fmaf(a: f32, b: f32, c: f32) -> f32;
+    fn fma(a: f64, b: f64, c: f64) -> f64;
+    fn floorf(f: f32) -> f32;
+    fn floor(f: f64) -> f64;
+    fn ceilf(f: f32) -> f32;
+    fn ceil(f: f64) -> f64;
+    fn truncf(f: f32) -> f32;
+    fn trunc(f: f64) -> f64;
+    fn nearbyintf(f: f32) -> f32;
+    fn nearbyint(f: f64) -> f64;
+    // float intrinsics
+    fn __powisf2(a: f32, b: i32) -> f32;
+    fn __powidf2(a: f64, b: i32) -> f64;
+    // FIXME(f16_f128): `compiler-builtins` doesn't currently support `__powitf2` on MSVC.
+    // fn __powitf2(a: f128, b: i32) -> f128;
+    fn powf(a: f32, b: f32) -> f32;
+    fn pow(a: f64, b: f64) -> f64;
+    fn expf(f: f32) -> f32;
+    fn exp(f: f64) -> f64;
+    fn exp2f(f: f32) -> f32;
+    fn exp2(f: f64) -> f64;
+    fn logf(f: f32) -> f32;
+    fn log(f: f64) -> f64;
+    fn log2f(f: f32) -> f32;
+    fn log2(f: f64) -> f64;
+    fn log10f(f: f32) -> f32;
+    fn log10(f: f64) -> f64;
+    fn sinf(f: f32) -> f32;
+    fn sin(f: f64) -> f64;
+    fn cosf(f: f32) -> f32;
+    fn cos(f: f64) -> f64;
+    fn fmaf128(a: f128, b: f128, c: f128) -> f128;
+    fn floorf16(f: f16) -> f16;
+    fn floorf128(f: f128) -> f128;
+    fn ceilf16(f: f16) -> f16;
+    fn ceilf128(f: f128) -> f128;
+    fn truncf16(f: f16) -> f16;
+    fn truncf128(f: f128) -> f128;
+    fn rintf16(f: f16) -> f16;
+    fn rintf128(f: f128) -> f128;
+    fn sqrtf16(f: f16) -> f16;
+    fn sqrtf128(f: f128) -> f128;
+    // FIXME(f16_f128): Add other float intrinsics as compiler-builtins gains support (meaning they
+    // are available on all targets).
 
     // allocator
     // NOTE: These need to be mentioned here despite not being part of compiler_builtins because
@@ -67,5 +152,4 @@ builtin_functions! {
     fn malloc(size: size_t) -> *mut c_void;
     fn realloc(p: *mut c_void, size: size_t) -> *mut c_void;
     fn free(p: *mut c_void) -> ();
-
 }
diff --git a/compiler/rustc_codegen_cranelift/src/debuginfo/emit.rs b/compiler/rustc_codegen_cranelift/src/debuginfo/emit.rs
index ccdc347af66..0f4696b9337 100644
--- a/compiler/rustc_codegen_cranelift/src/debuginfo/emit.rs
+++ b/compiler/rustc_codegen_cranelift/src/debuginfo/emit.rs
@@ -81,13 +81,36 @@ impl WriterRelocate {
     /// Perform the collected relocations to be usable for JIT usage.
     #[cfg(all(feature = "jit", not(windows)))]
     pub(super) fn relocate_for_jit(mut self, jit_module: &cranelift_jit::JITModule) -> Vec<u8> {
+        use cranelift_module::Module;
+
         for reloc in self.relocs.drain(..) {
             match reloc.name {
                 super::DebugRelocName::Section(_) => unreachable!(),
                 super::DebugRelocName::Symbol(sym) => {
-                    let addr = jit_module.get_finalized_function(
-                        cranelift_module::FuncId::from_u32(sym.try_into().unwrap()),
-                    );
+                    let addr = if sym & 1 << 31 == 0 {
+                        let func_id = FuncId::from_u32(sym.try_into().unwrap());
+                        // FIXME make JITModule::get_address public and use it here instead.
+                        // HACK rust_eh_personality is likely not defined in the same crate,
+                        // so get_finalized_function won't work. Use the rust_eh_personality
+                        // of cg_clif itself, which is likely ABI compatible.
+                        if jit_module.declarations().get_function_decl(func_id).name.as_deref()
+                            == Some("rust_eh_personality")
+                        {
+                            extern "C" {
+                                fn rust_eh_personality() -> !;
+                            }
+                            rust_eh_personality as *const u8
+                        } else {
+                            jit_module.get_finalized_function(func_id)
+                        }
+                    } else {
+                        jit_module
+                            .get_finalized_data(DataId::from_u32(
+                                u32::try_from(sym).unwrap() & !(1 << 31),
+                            ))
+                            .0
+                    };
+
                     let val = (addr as u64 as i64 + reloc.addend) as u64;
                     self.writer.write_udata_at(reloc.offset as usize, val, reloc.size).unwrap();
                 }
@@ -196,6 +219,16 @@ impl Writer for WriterRelocate {
                     });
                     self.write_udata(0, size)
                 }
+                gimli::DW_EH_PE_absptr => {
+                    self.relocs.push(DebugReloc {
+                        offset: self.len() as u32,
+                        size: size.into(),
+                        name: DebugRelocName::Symbol(symbol),
+                        addend,
+                        kind: object::RelocationKind::Absolute,
+                    });
+                    self.write_udata(0, size.into())
+                }
                 _ => Err(gimli::write::Error::UnsupportedPointerEncoding(eh_pe)),
             },
         }
diff --git a/compiler/rustc_codegen_cranelift/src/debuginfo/unwind.rs b/compiler/rustc_codegen_cranelift/src/debuginfo/unwind.rs
index 362333d35a4..74b82a7139a 100644
--- a/compiler/rustc_codegen_cranelift/src/debuginfo/unwind.rs
+++ b/compiler/rustc_codegen_cranelift/src/debuginfo/unwind.rs
@@ -1,7 +1,6 @@
 //! Unwind info generation (`.eh_frame`)
 
 use cranelift_codegen::ir::Endianness;
-use cranelift_codegen::isa::TargetIsa;
 use cranelift_codegen::isa::unwind::UnwindInfo;
 use cranelift_object::ObjectProduct;
 use gimli::RunTimeEndian;
@@ -18,14 +17,14 @@ pub(crate) struct UnwindContext {
 }
 
 impl UnwindContext {
-    pub(crate) fn new(isa: &dyn TargetIsa, pic_eh_frame: bool) -> Self {
-        let endian = match isa.endianness() {
+    pub(crate) fn new(module: &mut dyn Module, pic_eh_frame: bool) -> Self {
+        let endian = match module.isa().endianness() {
             Endianness::Little => RunTimeEndian::Little,
             Endianness::Big => RunTimeEndian::Big,
         };
         let mut frame_table = FrameTable::default();
 
-        let cie_id = if let Some(mut cie) = isa.create_systemv_cie() {
+        let cie_id = if let Some(mut cie) = module.isa().create_systemv_cie() {
             if pic_eh_frame {
                 cie.fde_address_encoding =
                     gimli::DwEhPe(gimli::DW_EH_PE_pcrel.0 | gimli::DW_EH_PE_sdata4.0);
@@ -38,8 +37,15 @@ impl UnwindContext {
         UnwindContext { endian, frame_table, cie_id }
     }
 
-    pub(crate) fn add_function(&mut self, func_id: FuncId, context: &Context, isa: &dyn TargetIsa) {
-        if let target_lexicon::OperatingSystem::MacOSX { .. } = isa.triple().operating_system {
+    pub(crate) fn add_function(
+        &mut self,
+        module: &mut dyn Module,
+        func_id: FuncId,
+        context: &Context,
+    ) {
+        if let target_lexicon::OperatingSystem::MacOSX { .. } =
+            module.isa().triple().operating_system
+        {
             // The object crate doesn't currently support DW_GNU_EH_PE_absptr, which macOS
             // requires for unwinding tables. In addition on arm64 it currently doesn't
             // support 32bit relocations as we currently use for the unwinding table.
@@ -48,7 +54,7 @@ impl UnwindContext {
         }
 
         let unwind_info = if let Some(unwind_info) =
-            context.compiled_code().unwrap().create_unwind_info(isa).unwrap()
+            context.compiled_code().unwrap().create_unwind_info(module.isa()).unwrap()
         {
             unwind_info
         } else {
diff --git a/compiler/rustc_codegen_cranelift/src/driver/jit.rs b/compiler/rustc_codegen_cranelift/src/driver/jit.rs
index e368cf4386d..b1f185b551c 100644
--- a/compiler/rustc_codegen_cranelift/src/driver/jit.rs
+++ b/compiler/rustc_codegen_cranelift/src/driver/jit.rs
@@ -6,6 +6,7 @@ use std::os::raw::{c_char, c_int};
 
 use cranelift_jit::{JITBuilder, JITModule};
 use rustc_codegen_ssa::CrateInfo;
+use rustc_middle::middle::codegen_fn_attrs::CodegenFnAttrFlags;
 use rustc_middle::mir::mono::MonoItem;
 use rustc_session::Session;
 use rustc_span::sym;
@@ -84,7 +85,7 @@ pub(crate) fn run_jit(tcx: TyCtxt<'_>, jit_args: Vec<String>) -> ! {
 
     tcx.dcx().abort_if_errors();
 
-    jit_module.finalize_definitions();
+    let mut jit_module = jit_module.finalize_definitions();
 
     println!(
         "Rustc codegen cranelift will JIT run the executable, because -Cllvm-args=mode=jit was passed"
@@ -104,7 +105,7 @@ pub(crate) fn run_jit(tcx: TyCtxt<'_>, jit_args: Vec<String>) -> ! {
         call_conv: jit_module.target_config().default_call_conv,
     };
     let start_func_id = jit_module.declare_function("main", Linkage::Import, &start_sig).unwrap();
-    let finalized_start: *const u8 = jit_module.module.get_finalized_function(start_func_id);
+    let finalized_start: *const u8 = jit_module.get_finalized_function(start_func_id);
 
     let f: extern "C" fn(c_int, *const *const c_char) -> c_int =
         unsafe { ::std::mem::transmute(finalized_start) };
@@ -119,7 +120,7 @@ pub(crate) fn run_jit(tcx: TyCtxt<'_>, jit_args: Vec<String>) -> ! {
     std::process::exit(ret);
 }
 
-pub(crate) fn codegen_and_compile_fn<'tcx>(
+fn codegen_and_compile_fn<'tcx>(
     tcx: TyCtxt<'tcx>,
     cx: &mut crate::CodegenCx,
     cached_context: &mut Context,
diff --git a/compiler/rustc_codegen_cranelift/src/intrinsics/llvm.rs b/compiler/rustc_codegen_cranelift/src/intrinsics/llvm.rs
index eb0dfbb69c3..2e02e85a997 100644
--- a/compiler/rustc_codegen_cranelift/src/intrinsics/llvm.rs
+++ b/compiler/rustc_codegen_cranelift/src/intrinsics/llvm.rs
@@ -66,7 +66,12 @@ pub(crate) fn codegen_llvm_intrinsic_call<'tcx>(
             fx.tcx
                 .dcx()
                 .warn(format!("unsupported llvm intrinsic {}; replacing with trap", intrinsic));
-            crate::trap::trap_unimplemented(fx, intrinsic);
+            let msg = format!(
+                "{intrinsic} is not yet supported.\n\
+                 See https://github.com/rust-lang/rustc_codegen_cranelift/issues/171\n\
+                 Please open an issue at https://github.com/rust-lang/rustc_codegen_cranelift/issues"
+            );
+            crate::base::codegen_panic_nounwind(fx, &msg, None);
             return;
         }
     }
diff --git a/compiler/rustc_codegen_cranelift/src/intrinsics/llvm_aarch64.rs b/compiler/rustc_codegen_cranelift/src/intrinsics/llvm_aarch64.rs
index 387c87d123a..d22483cf177 100644
--- a/compiler/rustc_codegen_cranelift/src/intrinsics/llvm_aarch64.rs
+++ b/compiler/rustc_codegen_cranelift/src/intrinsics/llvm_aarch64.rs
@@ -7,7 +7,7 @@ use crate::inline_asm::{CInlineAsmOperand, codegen_inline_asm_inner};
 use crate::intrinsics::*;
 use crate::prelude::*;
 
-pub(crate) fn codegen_aarch64_llvm_intrinsic_call<'tcx>(
+pub(super) fn codegen_aarch64_llvm_intrinsic_call<'tcx>(
     fx: &mut FunctionCx<'_, '_, 'tcx>,
     intrinsic: &str,
     args: &[Spanned<mir::Operand<'tcx>>],
@@ -507,7 +507,12 @@ pub(crate) fn codegen_aarch64_llvm_intrinsic_call<'tcx>(
                 "unsupported AArch64 llvm intrinsic {}; replacing with trap",
                 intrinsic
             ));
-            crate::trap::trap_unimplemented(fx, intrinsic);
+            let msg = format!(
+                "{intrinsic} is not yet supported.\n\
+                 See https://github.com/rust-lang/rustc_codegen_cranelift/issues/171\n\
+                 Please open an issue at https://github.com/rust-lang/rustc_codegen_cranelift/issues"
+            );
+            crate::base::codegen_panic_nounwind(fx, &msg, None);
             return;
         }
     }
diff --git a/compiler/rustc_codegen_cranelift/src/intrinsics/llvm_x86.rs b/compiler/rustc_codegen_cranelift/src/intrinsics/llvm_x86.rs
index c02d31844e0..3d67913a8ff 100644
--- a/compiler/rustc_codegen_cranelift/src/intrinsics/llvm_x86.rs
+++ b/compiler/rustc_codegen_cranelift/src/intrinsics/llvm_x86.rs
@@ -7,7 +7,7 @@ use crate::inline_asm::{CInlineAsmOperand, codegen_inline_asm_inner};
 use crate::intrinsics::*;
 use crate::prelude::*;
 
-pub(crate) fn codegen_x86_llvm_intrinsic_call<'tcx>(
+pub(super) fn codegen_x86_llvm_intrinsic_call<'tcx>(
     fx: &mut FunctionCx<'_, '_, 'tcx>,
     intrinsic: &str,
     args: &[Spanned<mir::Operand<'tcx>>],
@@ -147,10 +147,10 @@ pub(crate) fn codegen_x86_llvm_intrinsic_call<'tcx>(
                 let offset = fx.bcx.ins().imul(index_lane, scale);
                 let lane_ptr = fx.bcx.ins().iadd(ptr, offset);
                 let res = fx.bcx.ins().load(lane_clif_ty, MemFlags::trusted(), lane_ptr, 0);
-                fx.bcx.ins().jump(next, &[res]);
+                fx.bcx.ins().jump(next, &[res.into()]);
 
                 fx.bcx.switch_to_block(if_disabled);
-                fx.bcx.ins().jump(next, &[src_lane]);
+                fx.bcx.ins().jump(next, &[src_lane.into()]);
 
                 fx.bcx.seal_block(next);
                 fx.bcx.switch_to_block(next);
@@ -1316,7 +1316,12 @@ pub(crate) fn codegen_x86_llvm_intrinsic_call<'tcx>(
             fx.tcx
                 .dcx()
                 .warn(format!("unsupported x86 llvm intrinsic {}; replacing with trap", intrinsic));
-            crate::trap::trap_unimplemented(fx, intrinsic);
+            let msg = format!(
+                "{intrinsic} is not yet supported.\n\
+                 See https://github.com/rust-lang/rustc_codegen_cranelift/issues/171\n\
+                 Please open an issue at https://github.com/rust-lang/rustc_codegen_cranelift/issues"
+            );
+            crate::base::codegen_panic_nounwind(fx, &msg, None);
             return;
         }
     }
diff --git a/compiler/rustc_codegen_cranelift/src/intrinsics/mod.rs b/compiler/rustc_codegen_cranelift/src/intrinsics/mod.rs
index 9018d78b00a..b21ca32c9a2 100644
--- a/compiler/rustc_codegen_cranelift/src/intrinsics/mod.rs
+++ b/compiler/rustc_codegen_cranelift/src/intrinsics/mod.rs
@@ -27,6 +27,7 @@ use rustc_span::{Symbol, sym};
 
 pub(crate) use self::llvm::codegen_llvm_intrinsic_call;
 use crate::cast::clif_intcast;
+use crate::codegen_f16_f128;
 use crate::prelude::*;
 
 fn bug_on_incorrect_arg_count(intrinsic: impl std::fmt::Display) -> ! {
@@ -248,8 +249,10 @@ fn bool_to_zero_or_max_uint<'tcx>(
     let ty = fx.clif_type(ty).unwrap();
 
     let int_ty = match ty {
+        types::F16 => types::I16,
         types::F32 => types::I32,
         types::F64 => types::I64,
+        types::F128 => types::I128,
         ty => ty,
     };
 
@@ -308,45 +311,83 @@ fn codegen_float_intrinsic_call<'tcx>(
     ret: CPlace<'tcx>,
 ) -> bool {
     let (name, arg_count, ty, clif_ty) = match intrinsic {
+        sym::expf16 => ("expf16", 1, fx.tcx.types.f16, types::F16),
         sym::expf32 => ("expf", 1, fx.tcx.types.f32, types::F32),
         sym::expf64 => ("exp", 1, fx.tcx.types.f64, types::F64),
+        sym::expf128 => ("expf128", 1, fx.tcx.types.f128, types::F128),
+        sym::exp2f16 => ("exp2f16", 1, fx.tcx.types.f16, types::F16),
         sym::exp2f32 => ("exp2f", 1, fx.tcx.types.f32, types::F32),
         sym::exp2f64 => ("exp2", 1, fx.tcx.types.f64, types::F64),
+        sym::exp2f128 => ("exp2f128", 1, fx.tcx.types.f128, types::F128),
+        sym::sqrtf16 => ("sqrtf16", 1, fx.tcx.types.f16, types::F16),
         sym::sqrtf32 => ("sqrtf", 1, fx.tcx.types.f32, types::F32),
         sym::sqrtf64 => ("sqrt", 1, fx.tcx.types.f64, types::F64),
+        sym::sqrtf128 => ("sqrtf128", 1, fx.tcx.types.f128, types::F128),
+        sym::powif16 => ("__powisf2", 2, fx.tcx.types.f16, types::F16), // compiler-builtins
         sym::powif32 => ("__powisf2", 2, fx.tcx.types.f32, types::F32), // compiler-builtins
         sym::powif64 => ("__powidf2", 2, fx.tcx.types.f64, types::F64), // compiler-builtins
+        sym::powif128 => ("__powitf2", 2, fx.tcx.types.f128, types::F128), // compiler-builtins
+        sym::powf16 => ("powf16", 2, fx.tcx.types.f16, types::F16),
         sym::powf32 => ("powf", 2, fx.tcx.types.f32, types::F32),
         sym::powf64 => ("pow", 2, fx.tcx.types.f64, types::F64),
+        sym::powf128 => ("powf128", 2, fx.tcx.types.f128, types::F128),
+        sym::logf16 => ("logf16", 1, fx.tcx.types.f16, types::F16),
         sym::logf32 => ("logf", 1, fx.tcx.types.f32, types::F32),
         sym::logf64 => ("log", 1, fx.tcx.types.f64, types::F64),
+        sym::logf128 => ("logf128", 1, fx.tcx.types.f128, types::F128),
+        sym::log2f16 => ("log2f16", 1, fx.tcx.types.f16, types::F16),
         sym::log2f32 => ("log2f", 1, fx.tcx.types.f32, types::F32),
         sym::log2f64 => ("log2", 1, fx.tcx.types.f64, types::F64),
+        sym::log2f128 => ("log2f128", 1, fx.tcx.types.f128, types::F128),
+        sym::log10f16 => ("log10f16", 1, fx.tcx.types.f16, types::F16),
         sym::log10f32 => ("log10f", 1, fx.tcx.types.f32, types::F32),
         sym::log10f64 => ("log10", 1, fx.tcx.types.f64, types::F64),
+        sym::log10f128 => ("log10f128", 1, fx.tcx.types.f128, types::F128),
+        sym::fabsf16 => ("fabsf16", 1, fx.tcx.types.f16, types::F16),
         sym::fabsf32 => ("fabsf", 1, fx.tcx.types.f32, types::F32),
         sym::fabsf64 => ("fabs", 1, fx.tcx.types.f64, types::F64),
+        sym::fabsf128 => ("fabsf128", 1, fx.tcx.types.f128, types::F128),
+        sym::fmaf16 => ("fmaf16", 3, fx.tcx.types.f16, types::F16),
         sym::fmaf32 => ("fmaf", 3, fx.tcx.types.f32, types::F32),
         sym::fmaf64 => ("fma", 3, fx.tcx.types.f64, types::F64),
+        sym::fmaf128 => ("fmaf128", 3, fx.tcx.types.f128, types::F128),
         // FIXME: calling `fma` from libc without FMA target feature uses expensive sofware emulation
+        sym::fmuladdf16 => ("fmaf16", 3, fx.tcx.types.f16, types::F16), // TODO: use cranelift intrinsic analogous to llvm.fmuladd.f16
         sym::fmuladdf32 => ("fmaf", 3, fx.tcx.types.f32, types::F32), // TODO: use cranelift intrinsic analogous to llvm.fmuladd.f32
         sym::fmuladdf64 => ("fma", 3, fx.tcx.types.f64, types::F64), // TODO: use cranelift intrinsic analogous to llvm.fmuladd.f64
+        sym::fmuladdf128 => ("fmaf128", 3, fx.tcx.types.f128, types::F128), // TODO: use cranelift intrinsic analogous to llvm.fmuladd.f128
+        sym::copysignf16 => ("copysignf16", 2, fx.tcx.types.f16, types::F16),
         sym::copysignf32 => ("copysignf", 2, fx.tcx.types.f32, types::F32),
         sym::copysignf64 => ("copysign", 2, fx.tcx.types.f64, types::F64),
+        sym::copysignf128 => ("copysignf128", 2, fx.tcx.types.f128, types::F128),
+        sym::floorf16 => ("floorf16", 1, fx.tcx.types.f16, types::F16),
         sym::floorf32 => ("floorf", 1, fx.tcx.types.f32, types::F32),
         sym::floorf64 => ("floor", 1, fx.tcx.types.f64, types::F64),
+        sym::floorf128 => ("floorf128", 1, fx.tcx.types.f128, types::F128),
+        sym::ceilf16 => ("ceilf16", 1, fx.tcx.types.f16, types::F16),
         sym::ceilf32 => ("ceilf", 1, fx.tcx.types.f32, types::F32),
         sym::ceilf64 => ("ceil", 1, fx.tcx.types.f64, types::F64),
+        sym::ceilf128 => ("ceilf128", 1, fx.tcx.types.f128, types::F128),
+        sym::truncf16 => ("truncf16", 1, fx.tcx.types.f16, types::F16),
         sym::truncf32 => ("truncf", 1, fx.tcx.types.f32, types::F32),
         sym::truncf64 => ("trunc", 1, fx.tcx.types.f64, types::F64),
+        sym::truncf128 => ("truncf128", 1, fx.tcx.types.f128, types::F128),
+        sym::round_ties_even_f16 => ("rintf16", 1, fx.tcx.types.f16, types::F16),
         sym::round_ties_even_f32 => ("rintf", 1, fx.tcx.types.f32, types::F32),
         sym::round_ties_even_f64 => ("rint", 1, fx.tcx.types.f64, types::F64),
+        sym::round_ties_even_f128 => ("rintf128", 1, fx.tcx.types.f128, types::F128),
+        sym::roundf16 => ("roundf16", 1, fx.tcx.types.f16, types::F16),
         sym::roundf32 => ("roundf", 1, fx.tcx.types.f32, types::F32),
         sym::roundf64 => ("round", 1, fx.tcx.types.f64, types::F64),
+        sym::roundf128 => ("roundf128", 1, fx.tcx.types.f128, types::F128),
+        sym::sinf16 => ("sinf16", 1, fx.tcx.types.f16, types::F16),
         sym::sinf32 => ("sinf", 1, fx.tcx.types.f32, types::F32),
         sym::sinf64 => ("sin", 1, fx.tcx.types.f64, types::F64),
+        sym::sinf128 => ("sinf128", 1, fx.tcx.types.f128, types::F128),
+        sym::cosf16 => ("cosf16", 1, fx.tcx.types.f16, types::F16),
         sym::cosf32 => ("cosf", 1, fx.tcx.types.f32, types::F32),
         sym::cosf64 => ("cos", 1, fx.tcx.types.f64, types::F64),
+        sym::cosf128 => ("cosf128", 1, fx.tcx.types.f128, types::F128),
         _ => return false,
     };
 
@@ -379,13 +420,26 @@ fn codegen_float_intrinsic_call<'tcx>(
     };
 
     let layout = fx.layout_of(ty);
+    // FIXME(bytecodealliance/wasmtime#8312): Use native Cranelift operations
+    // for `f16` and `f128` once the lowerings have been implemented in Cranelift.
     let res = match intrinsic {
+        sym::fmaf16 | sym::fmuladdf16 => {
+            CValue::by_val(codegen_f16_f128::fma_f16(fx, args[0], args[1], args[2]), layout)
+        }
         sym::fmaf32 | sym::fmaf64 | sym::fmuladdf32 | sym::fmuladdf64 => {
             CValue::by_val(fx.bcx.ins().fma(args[0], args[1], args[2]), layout)
         }
+        sym::copysignf16 => {
+            CValue::by_val(codegen_f16_f128::copysign_f16(fx, args[0], args[1]), layout)
+        }
+        sym::copysignf128 => {
+            CValue::by_val(codegen_f16_f128::copysign_f128(fx, args[0], args[1]), layout)
+        }
         sym::copysignf32 | sym::copysignf64 => {
             CValue::by_val(fx.bcx.ins().fcopysign(args[0], args[1]), layout)
         }
+        sym::fabsf16 => CValue::by_val(codegen_f16_f128::abs_f16(fx, args[0]), layout),
+        sym::fabsf128 => CValue::by_val(codegen_f16_f128::abs_f128(fx, args[0]), layout),
         sym::fabsf32
         | sym::fabsf64
         | sym::floorf32
@@ -415,11 +469,36 @@ fn codegen_float_intrinsic_call<'tcx>(
 
         // These intrinsics aren't supported natively by Cranelift.
         // Lower them to a libcall.
-        sym::powif32 | sym::powif64 => {
-            let input_tys: Vec<_> = vec![AbiParam::new(clif_ty), AbiParam::new(types::I32)];
+        sym::powif16 | sym::powif32 | sym::powif64 | sym::powif128 => {
+            let temp;
+            let (clif_ty, args) = if intrinsic == sym::powif16 {
+                temp = [codegen_f16_f128::f16_to_f32(fx, args[0]), args[1]];
+                (types::F32, temp.as_slice())
+            } else {
+                (clif_ty, args)
+            };
+            let input_tys: Vec<_> =
+                vec![AbiParam::new(clif_ty), lib_call_arg_param(fx.tcx, types::I32, true)];
             let ret_val = fx.lib_call(name, input_tys, vec![AbiParam::new(clif_ty)], &args)[0];
+            let ret_val = if intrinsic == sym::powif16 {
+                codegen_f16_f128::f32_to_f16(fx, ret_val)
+            } else {
+                ret_val
+            };
             CValue::by_val(ret_val, fx.layout_of(ty))
         }
+        sym::powf16 => {
+            // FIXME(f16_f128): Rust `compiler-builtins` doesn't export `powf16` yet.
+            let x = codegen_f16_f128::f16_to_f32(fx, args[0]);
+            let y = codegen_f16_f128::f16_to_f32(fx, args[1]);
+            let ret_val = fx.lib_call(
+                "powf",
+                vec![AbiParam::new(types::F32), AbiParam::new(types::F32)],
+                vec![AbiParam::new(types::F32)],
+                &[x, y],
+            )[0];
+            CValue::by_val(codegen_f16_f128::f32_to_f16(fx, ret_val), fx.layout_of(ty))
+        }
         _ => {
             let input_tys: Vec<_> = args.iter().map(|_| AbiParam::new(clif_ty)).collect();
             let ret_val = fx.lib_call(name, input_tys, vec![AbiParam::new(clif_ty)], &args)[0];
@@ -801,7 +880,11 @@ fn codegen_regular_intrinsic_call<'tcx>(
                     // FIXME implement 128bit atomics
                     if fx.tcx.is_compiler_builtins(LOCAL_CRATE) {
                         // special case for compiler-builtins to avoid having to patch it
-                        crate::trap::trap_unimplemented(fx, "128bit atomics not yet supported");
+                        crate::base::codegen_panic_nounwind(
+                            fx,
+                            "128bit atomics not yet supported",
+                            None,
+                        );
                         return Ok(());
                     } else {
                         fx.tcx
@@ -832,7 +915,11 @@ fn codegen_regular_intrinsic_call<'tcx>(
                     // FIXME implement 128bit atomics
                     if fx.tcx.is_compiler_builtins(LOCAL_CRATE) {
                         // special case for compiler-builtins to avoid having to patch it
-                        crate::trap::trap_unimplemented(fx, "128bit atomics not yet supported");
+                        crate::base::codegen_panic_nounwind(
+                            fx,
+                            "128bit atomics not yet supported",
+                            None,
+                        );
                         return Ok(());
                     } else {
                         fx.tcx
@@ -1109,6 +1196,20 @@ fn codegen_regular_intrinsic_call<'tcx>(
             ret.write_cvalue(fx, old);
         }
 
+        sym::minimumf16 => {
+            intrinsic_args!(fx, args => (a, b); intrinsic);
+            let a = a.load_scalar(fx);
+            let b = b.load_scalar(fx);
+
+            // FIXME(bytecodealliance/wasmtime#8312): Use `fmin` directly once
+            // Cranelift backend lowerings are implemented.
+            let a = codegen_f16_f128::f16_to_f32(fx, a);
+            let b = codegen_f16_f128::f16_to_f32(fx, b);
+            let val = fx.bcx.ins().fmin(a, b);
+            let val = codegen_f16_f128::f32_to_f16(fx, val);
+            let val = CValue::by_val(val, fx.layout_of(fx.tcx.types.f16));
+            ret.write_cvalue(fx, val);
+        }
         sym::minimumf32 => {
             intrinsic_args!(fx, args => (a, b); intrinsic);
             let a = a.load_scalar(fx);
@@ -1127,6 +1228,31 @@ fn codegen_regular_intrinsic_call<'tcx>(
             let val = CValue::by_val(val, fx.layout_of(fx.tcx.types.f64));
             ret.write_cvalue(fx, val);
         }
+        sym::minimumf128 => {
+            intrinsic_args!(fx, args => (a, b); intrinsic);
+            let a = a.load_scalar(fx);
+            let b = b.load_scalar(fx);
+
+            // FIXME(bytecodealliance/wasmtime#8312): Use `fmin` once  Cranelift
+            // backend lowerings are implemented.
+            let val = codegen_f16_f128::fmin_f128(fx, a, b);
+            let val = CValue::by_val(val, fx.layout_of(fx.tcx.types.f128));
+            ret.write_cvalue(fx, val);
+        }
+        sym::maximumf16 => {
+            intrinsic_args!(fx, args => (a, b); intrinsic);
+            let a = a.load_scalar(fx);
+            let b = b.load_scalar(fx);
+
+            // FIXME(bytecodealliance/wasmtime#8312): Use `fmax` directly once
+            // Cranelift backend lowerings are implemented.
+            let a = codegen_f16_f128::f16_to_f32(fx, a);
+            let b = codegen_f16_f128::f16_to_f32(fx, b);
+            let val = fx.bcx.ins().fmax(a, b);
+            let val = codegen_f16_f128::f32_to_f16(fx, val);
+            let val = CValue::by_val(val, fx.layout_of(fx.tcx.types.f16));
+            ret.write_cvalue(fx, val);
+        }
         sym::maximumf32 => {
             intrinsic_args!(fx, args => (a, b); intrinsic);
             let a = a.load_scalar(fx);
@@ -1145,7 +1271,27 @@ fn codegen_regular_intrinsic_call<'tcx>(
             let val = CValue::by_val(val, fx.layout_of(fx.tcx.types.f64));
             ret.write_cvalue(fx, val);
         }
+        sym::maximumf128 => {
+            intrinsic_args!(fx, args => (a, b); intrinsic);
+            let a = a.load_scalar(fx);
+            let b = b.load_scalar(fx);
+
+            // FIXME(bytecodealliance/wasmtime#8312): Use `fmax` once  Cranelift
+            // backend lowerings are implemented.
+            let val = codegen_f16_f128::fmax_f128(fx, a, b);
+            let val = CValue::by_val(val, fx.layout_of(fx.tcx.types.f128));
+            ret.write_cvalue(fx, val);
+        }
+
+        sym::minnumf16 => {
+            intrinsic_args!(fx, args => (a, b); intrinsic);
+            let a = a.load_scalar(fx);
+            let b = b.load_scalar(fx);
 
+            let val = crate::num::codegen_float_min(fx, a, b);
+            let val = CValue::by_val(val, fx.layout_of(fx.tcx.types.f16));
+            ret.write_cvalue(fx, val);
+        }
         sym::minnumf32 => {
             intrinsic_args!(fx, args => (a, b); intrinsic);
             let a = a.load_scalar(fx);
@@ -1164,6 +1310,24 @@ fn codegen_regular_intrinsic_call<'tcx>(
             let val = CValue::by_val(val, fx.layout_of(fx.tcx.types.f64));
             ret.write_cvalue(fx, val);
         }
+        sym::minnumf128 => {
+            intrinsic_args!(fx, args => (a, b); intrinsic);
+            let a = a.load_scalar(fx);
+            let b = b.load_scalar(fx);
+
+            let val = crate::num::codegen_float_min(fx, a, b);
+            let val = CValue::by_val(val, fx.layout_of(fx.tcx.types.f128));
+            ret.write_cvalue(fx, val);
+        }
+        sym::maxnumf16 => {
+            intrinsic_args!(fx, args => (a, b); intrinsic);
+            let a = a.load_scalar(fx);
+            let b = b.load_scalar(fx);
+
+            let val = crate::num::codegen_float_max(fx, a, b);
+            let val = CValue::by_val(val, fx.layout_of(fx.tcx.types.f16));
+            ret.write_cvalue(fx, val);
+        }
         sym::maxnumf32 => {
             intrinsic_args!(fx, args => (a, b); intrinsic);
             let a = a.load_scalar(fx);
@@ -1182,6 +1346,15 @@ fn codegen_regular_intrinsic_call<'tcx>(
             let val = CValue::by_val(val, fx.layout_of(fx.tcx.types.f64));
             ret.write_cvalue(fx, val);
         }
+        sym::maxnumf128 => {
+            intrinsic_args!(fx, args => (a, b); intrinsic);
+            let a = a.load_scalar(fx);
+            let b = b.load_scalar(fx);
+
+            let val = crate::num::codegen_float_max(fx, a, b);
+            let val = CValue::by_val(val, fx.layout_of(fx.tcx.types.f128));
+            ret.write_cvalue(fx, val);
+        }
 
         sym::catch_unwind => {
             intrinsic_args!(fx, args => (f, data, catch_fn); intrinsic);
diff --git a/compiler/rustc_codegen_cranelift/src/intrinsics/simd.rs b/compiler/rustc_codegen_cranelift/src/intrinsics/simd.rs
index dd6d8dbb6f5..46a441488fa 100644
--- a/compiler/rustc_codegen_cranelift/src/intrinsics/simd.rs
+++ b/compiler/rustc_codegen_cranelift/src/intrinsics/simd.rs
@@ -283,6 +283,20 @@ pub(super) fn codegen_simd_intrinsic_call<'tcx>(
             ret_lane.write_cvalue(fx, val);
         }
 
+        sym::simd_insert_dyn => {
+            intrinsic_args!(fx, args => (base, idx, val); intrinsic);
+
+            if !base.layout().ty.is_simd() {
+                report_simd_type_validation_error(fx, intrinsic, span, base.layout().ty);
+                return;
+            }
+
+            let idx = idx.load_scalar(fx);
+
+            ret.write_cvalue(fx, base);
+            ret.write_lane_dyn(fx, idx, val);
+        }
+
         sym::simd_extract => {
             let (v, idx) = match args {
                 [v, idx] => (v, idx),
@@ -318,6 +332,20 @@ pub(super) fn codegen_simd_intrinsic_call<'tcx>(
             ret.write_cvalue(fx, ret_lane);
         }
 
+        sym::simd_extract_dyn => {
+            intrinsic_args!(fx, args => (v, idx); intrinsic);
+
+            if !v.layout().ty.is_simd() {
+                report_simd_type_validation_error(fx, intrinsic, span, v.layout().ty);
+                return;
+            }
+
+            let idx = idx.load_scalar(fx);
+
+            let ret_lane = v.value_lane_dyn(fx, idx);
+            ret.write_cvalue(fx, ret_lane);
+        }
+
         sym::simd_neg
         | sym::simd_bswap
         | sym::simd_bitreverse
@@ -980,10 +1008,10 @@ pub(super) fn codegen_simd_intrinsic_call<'tcx>(
 
                 fx.bcx.switch_to_block(if_enabled);
                 let res = fx.bcx.ins().load(lane_clif_ty, MemFlags::trusted(), ptr_lane, 0);
-                fx.bcx.ins().jump(next, &[res]);
+                fx.bcx.ins().jump(next, &[res.into()]);
 
                 fx.bcx.switch_to_block(if_disabled);
-                fx.bcx.ins().jump(next, &[val_lane]);
+                fx.bcx.ins().jump(next, &[val_lane.into()]);
 
                 fx.bcx.seal_block(next);
                 fx.bcx.switch_to_block(next);
@@ -1029,10 +1057,10 @@ pub(super) fn codegen_simd_intrinsic_call<'tcx>(
                     ptr_val,
                     Offset32::new(offset),
                 );
-                fx.bcx.ins().jump(next, &[res]);
+                fx.bcx.ins().jump(next, &[res.into()]);
 
                 fx.bcx.switch_to_block(if_disabled);
-                fx.bcx.ins().jump(next, &[val_lane]);
+                fx.bcx.ins().jump(next, &[val_lane.into()]);
 
                 fx.bcx.seal_block(next);
                 fx.bcx.switch_to_block(next);
diff --git a/compiler/rustc_codegen_cranelift/src/lib.rs b/compiler/rustc_codegen_cranelift/src/lib.rs
index ab09a6f8b38..8ef623cde00 100644
--- a/compiler/rustc_codegen_cranelift/src/lib.rs
+++ b/compiler/rustc_codegen_cranelift/src/lib.rs
@@ -6,6 +6,9 @@
 #![cfg_attr(doc, feature(rustdoc_internals))]
 // Note: please avoid adding other feature gates where possible
 #![feature(rustc_private)]
+// Only used to define intrinsics in `compiler_builtins.rs`.
+#![feature(f16)]
+#![feature(f128)]
 // Note: please avoid adding other feature gates where possible
 #![warn(rust_2018_idioms)]
 #![warn(unreachable_pub)]
@@ -57,6 +60,7 @@ mod allocator;
 mod analyze;
 mod base;
 mod cast;
+mod codegen_f16_f128;
 mod codegen_i128;
 mod common;
 mod compiler_builtins;
@@ -76,7 +80,6 @@ mod optimize;
 mod pointer;
 mod pretty_clif;
 mod toolchain;
-mod trap;
 mod unsize;
 mod unwind_module;
 mod value_and_place;
@@ -198,14 +201,36 @@ impl CodegenBackend for CraneliftCodegenBackend {
         // FIXME do `unstable_target_features` properly
         let unstable_target_features = target_features.clone();
 
+        // FIXME(f16_f128): LLVM 20 (currently used by `rustc`) passes `f128` in XMM registers on
+        // Windows, whereas LLVM 21+ and Cranelift pass it indirectly. This means that `f128` won't
+        // work when linking against a LLVM-built sysroot.
+        let has_reliable_f128 = !sess.target.is_like_windows;
+        let has_reliable_f16 = match &*sess.target.arch {
+            // FIXME(f16_f128): LLVM 20 does not support `f16` on s390x, meaning the required
+            // builtins are not available in `compiler-builtins`.
+            "s390x" => false,
+            // FIXME(f16_f128): `rustc_codegen_llvm` currently disables support on Windows GNU
+            // targets due to GCC using a different ABI than LLVM. Therefore `f16` won't be
+            // available when using a LLVM-built sysroot.
+            "x86_64"
+                if sess.target.os == "windows"
+                    && sess.target.env == "gnu"
+                    && sess.target.abi != "llvm" =>
+            {
+                false
+            }
+            _ => true,
+        };
+
         TargetConfig {
             target_features,
             unstable_target_features,
-            // Cranelift does not yet support f16 or f128
-            has_reliable_f16: false,
-            has_reliable_f16_math: false,
-            has_reliable_f128: false,
-            has_reliable_f128_math: false,
+            // `rustc_codegen_cranelift` polyfills functionality not yet
+            // available in Cranelift.
+            has_reliable_f16,
+            has_reliable_f16_math: has_reliable_f16,
+            has_reliable_f128,
+            has_reliable_f128_math: has_reliable_f128,
         }
     }
 
@@ -290,6 +315,12 @@ fn build_isa(sess: &Session, jit: bool) -> Arc<dyn TargetIsa + 'static> {
 
     flags_builder.set("enable_llvm_abi_extensions", "true").unwrap();
 
+    if let Some(align) = sess.opts.unstable_opts.min_function_alignment {
+        flags_builder
+            .set("log2_min_function_alignment", &align.bytes().ilog2().to_string())
+            .unwrap();
+    }
+
     use rustc_session::config::OptLevel;
     match sess.opts.optimize {
         OptLevel::No => {
diff --git a/compiler/rustc_codegen_cranelift/src/num.rs b/compiler/rustc_codegen_cranelift/src/num.rs
index 2a4d1e3ae57..f53045df6e7 100644
--- a/compiler/rustc_codegen_cranelift/src/num.rs
+++ b/compiler/rustc_codegen_cranelift/src/num.rs
@@ -1,8 +1,9 @@
 //! Various operations on integer and floating-point numbers
 
+use crate::codegen_f16_f128;
 use crate::prelude::*;
 
-pub(crate) fn bin_op_to_intcc(bin_op: BinOp, signed: bool) -> IntCC {
+fn bin_op_to_intcc(bin_op: BinOp, signed: bool) -> IntCC {
     use BinOp::*;
     use IntCC::*;
     match bin_op {
@@ -109,7 +110,7 @@ pub(crate) fn codegen_binop<'tcx>(
     }
 }
 
-pub(crate) fn codegen_bool_binop<'tcx>(
+fn codegen_bool_binop<'tcx>(
     fx: &mut FunctionCx<'_, '_, 'tcx>,
     bin_op: BinOp,
     in_lhs: CValue<'tcx>,
@@ -350,25 +351,60 @@ pub(crate) fn codegen_float_binop<'tcx>(
     let lhs = in_lhs.load_scalar(fx);
     let rhs = in_rhs.load_scalar(fx);
 
+    // FIXME(bytecodealliance/wasmtime#8312): Remove once backend lowerings have
+    // been added to Cranelift.
+    let (lhs, rhs) = if *in_lhs.layout().ty.kind() == ty::Float(FloatTy::F16) {
+        (codegen_f16_f128::f16_to_f32(fx, lhs), codegen_f16_f128::f16_to_f32(fx, rhs))
+    } else {
+        (lhs, rhs)
+    };
     let b = fx.bcx.ins();
     let res = match bin_op {
+        // FIXME(bytecodealliance/wasmtime#8312): Remove once backend lowerings
+        // have been added to Cranelift.
+        BinOp::Add | BinOp::Sub | BinOp::Mul | BinOp::Div
+            if *in_lhs.layout().ty.kind() == ty::Float(FloatTy::F128) =>
+        {
+            codegen_f16_f128::codegen_f128_binop(fx, bin_op, lhs, rhs)
+        }
         BinOp::Add => b.fadd(lhs, rhs),
         BinOp::Sub => b.fsub(lhs, rhs),
         BinOp::Mul => b.fmul(lhs, rhs),
         BinOp::Div => b.fdiv(lhs, rhs),
         BinOp::Rem => {
-            let (name, ty) = match in_lhs.layout().ty.kind() {
-                ty::Float(FloatTy::F32) => ("fmodf", types::F32),
-                ty::Float(FloatTy::F64) => ("fmod", types::F64),
+            let (name, ty, lhs, rhs) = match in_lhs.layout().ty.kind() {
+                ty::Float(FloatTy::F16) => (
+                    "fmodf",
+                    types::F32,
+                    // FIXME(bytecodealliance/wasmtime#8312): Already converted
+                    // by the FIXME above.
+                    // fx.bcx.ins().fpromote(types::F32, lhs),
+                    // fx.bcx.ins().fpromote(types::F32, rhs),
+                    lhs,
+                    rhs,
+                ),
+                ty::Float(FloatTy::F32) => ("fmodf", types::F32, lhs, rhs),
+                ty::Float(FloatTy::F64) => ("fmod", types::F64, lhs, rhs),
+                ty::Float(FloatTy::F128) => ("fmodf128", types::F128, lhs, rhs),
                 _ => bug!(),
             };
 
-            fx.lib_call(
+            let ret_val = fx.lib_call(
                 name,
                 vec![AbiParam::new(ty), AbiParam::new(ty)],
                 vec![AbiParam::new(ty)],
                 &[lhs, rhs],
-            )[0]
+            )[0];
+
+            let ret_val = if *in_lhs.layout().ty.kind() == ty::Float(FloatTy::F16) {
+                // FIXME(bytecodealliance/wasmtime#8312): Use native Cranelift
+                // operation once Cranelift backend lowerings have been
+                // implemented.
+                codegen_f16_f128::f32_to_f16(fx, ret_val)
+            } else {
+                ret_val
+            };
+            return CValue::by_val(ret_val, in_lhs.layout());
         }
         BinOp::Eq | BinOp::Lt | BinOp::Le | BinOp::Ne | BinOp::Ge | BinOp::Gt => {
             let fltcc = match bin_op {
@@ -380,16 +416,26 @@ pub(crate) fn codegen_float_binop<'tcx>(
                 BinOp::Gt => FloatCC::GreaterThan,
                 _ => unreachable!(),
             };
-            let val = fx.bcx.ins().fcmp(fltcc, lhs, rhs);
+            // FIXME(bytecodealliance/wasmtime#8312): Replace with Cranelift
+            // `fcmp` once `f16`/`f128` backend lowerings have been added to
+            // Cranelift.
+            let val = codegen_f16_f128::fcmp(fx, fltcc, lhs, rhs);
             return CValue::by_val(val, fx.layout_of(fx.tcx.types.bool));
         }
         _ => unreachable!("{:?}({:?}, {:?})", bin_op, in_lhs, in_rhs),
     };
 
+    // FIXME(bytecodealliance/wasmtime#8312): Remove once backend lowerings have
+    // been added to Cranelift.
+    let res = if *in_lhs.layout().ty.kind() == ty::Float(FloatTy::F16) {
+        codegen_f16_f128::f32_to_f16(fx, res)
+    } else {
+        res
+    };
     CValue::by_val(res, in_lhs.layout())
 }
 
-pub(crate) fn codegen_ptr_binop<'tcx>(
+fn codegen_ptr_binop<'tcx>(
     fx: &mut FunctionCx<'_, '_, 'tcx>,
     bin_op: BinOp,
     in_lhs: CValue<'tcx>,
@@ -457,15 +503,19 @@ pub(crate) fn codegen_ptr_binop<'tcx>(
 // and `a.is_nan() ? b : (a <= b ? b : a)` for `maxnumf*`. NaN checks are done by comparing
 // a float against itself. Only in case of NaN is it not equal to itself.
 pub(crate) fn codegen_float_min(fx: &mut FunctionCx<'_, '_, '_>, a: Value, b: Value) -> Value {
-    let a_is_nan = fx.bcx.ins().fcmp(FloatCC::NotEqual, a, a);
-    let a_ge_b = fx.bcx.ins().fcmp(FloatCC::GreaterThanOrEqual, a, b);
+    // FIXME(bytecodealliance/wasmtime#8312): Replace with Cranelift `fcmp` once
+    // `f16`/`f128` backend lowerings have been added to Cranelift.
+    let a_is_nan = codegen_f16_f128::fcmp(fx, FloatCC::NotEqual, a, a);
+    let a_ge_b = codegen_f16_f128::fcmp(fx, FloatCC::GreaterThanOrEqual, a, b);
     let temp = fx.bcx.ins().select(a_ge_b, b, a);
     fx.bcx.ins().select(a_is_nan, b, temp)
 }
 
 pub(crate) fn codegen_float_max(fx: &mut FunctionCx<'_, '_, '_>, a: Value, b: Value) -> Value {
-    let a_is_nan = fx.bcx.ins().fcmp(FloatCC::NotEqual, a, a);
-    let a_le_b = fx.bcx.ins().fcmp(FloatCC::LessThanOrEqual, a, b);
+    // FIXME(bytecodealliance/wasmtime#8312): Replace with Cranelift `fcmp` once
+    // `f16`/`f128` backend lowerings have been added to Cranelift.
+    let a_is_nan = codegen_f16_f128::fcmp(fx, FloatCC::NotEqual, a, a);
+    let a_le_b = codegen_f16_f128::fcmp(fx, FloatCC::LessThanOrEqual, a, b);
     let temp = fx.bcx.ins().select(a_le_b, b, a);
     fx.bcx.ins().select(a_is_nan, b, temp)
 }
diff --git a/compiler/rustc_codegen_cranelift/src/pretty_clif.rs b/compiler/rustc_codegen_cranelift/src/pretty_clif.rs
index cd254b04ed9..9400ae9fcff 100644
--- a/compiler/rustc_codegen_cranelift/src/pretty_clif.rs
+++ b/compiler/rustc_codegen_cranelift/src/pretty_clif.rs
@@ -8,40 +8,41 @@
 //! target x86_64
 //!
 //! function u0:22(i64) -> i8, i8 system_v {
-//! ; symbol _ZN97_$LT$example..IsNotEmpty$u20$as$u20$mini_core..FnOnce$LT$$LP$$RF$$RF$$u5b$u16$u5d$$C$$RP$$GT$$GT$9call_once17hd517c453d67c0915E
-//! ; instance Instance { def: Item(WithOptConstParam { did: DefId(0:42 ~ example[4e51]::{impl#0}::call_once), const_param_did: None }), args: [ReErased, ReErased] }
-//! ; abi FnAbi { args: [ArgAbi { layout: TyAndLayout { ty: IsNotEmpty, layout: Layout { size: Size(0 bytes), align: AbiAndPrefAlign { abi: Align(1 bytes), pref: Align(8 bytes) }, abi: Aggregate { sized: true }, fields: Arbitrary { offsets: [], memory_index: [] }, largest_niche: None, variants: Single { index: 0 } } }, mode: Ignore }, ArgAbi { layout: TyAndLayout { ty: &&[u16], layout: Layout { size: Size(8 bytes), align: AbiAndPrefAlign { abi: Align(8 bytes), pref: Align(8 bytes) }, abi: Scalar(Initialized { value: Pointer(AddressSpace(0)), valid_range: 1..=18446744073709551615 }), fields: Primitive, largest_niche: Some(Niche { offset: Size(0 bytes), value: Pointer(AddressSpace(0)), valid_range: 1..=18446744073709551615 }), variants: Single { index: 0 } } }, mode: Direct(ArgAttributes { regular: NonNull | NoUndef, arg_ext: None, pointee_size: Size(0 bytes), pointee_align: Some(Align(8 bytes)) }) }], ret: ArgAbi { layout: TyAndLayout { ty: (u8, u8), layout: Layout { size: Size(2 bytes), align: AbiAndPrefAlign { abi: Align(1 bytes), pref: Align(8 bytes) }, abi: ScalarPair(Initialized { value: Int(I8, false), valid_range: 0..=255 }, Initialized { value: Int(I8, false), valid_range: 0..=255 }), fields: Arbitrary { offsets: [Size(0 bytes), Size(1 bytes)], memory_index: [0, 1] }, largest_niche: None, variants: Single { index: 0 } } }, mode: Pair(ArgAttributes { regular: NoUndef, arg_ext: None, pointee_size: Size(0 bytes), pointee_align: None }, ArgAttributes { regular: NoUndef, arg_ext: None, pointee_size: Size(0 bytes), pointee_align: None }) }, c_variadic: false, fixed_count: 1, conv: Rust, can_unwind: false }
+//! ; symbol _ZN97_$LT$example..IsNotEmpty$u20$as$u20$mini_core..FnOnce$LT$$LP$$RF$$RF$$u5b$u16$u5d$$C$$RP$$GT$$GT$9call_once17hd361e9f5c3d1c4deE
+//! ; instance Instance { def: Item(DefId(0:42 ~ example[3895]::{impl#0}::call_once)), args: ['{erased}, '{erased}] }
+//! ; abi FnAbi { args: [ArgAbi { layout: TyAndLayout { ty: IsNotEmpty, layout: Layout { size: Size(0 bytes), align: AbiAndPrefAlign { abi: Align(1 bytes), pref: Align(8 bytes) }, backend_repr: Memory { sized: true }, fields: Arbitrary { offsets: [], memory_index: [] }, largest_niche: None, uninhabited: false, variants: Single { index: 0 }, max_repr_align: None, unadjusted_abi_align: Align(1 bytes), randomization_seed: 12266848898570219025 } }, mode: Ignore }, ArgAbi { layout: TyAndLayout { ty: &&[u16], layout: Layout { size: Size(8 bytes), align: AbiAndPrefAlign { abi: Align(8 bytes), pref: Align(8 bytes) }, backend_repr: Scalar(Initialized { value: Pointer(AddressSpace(0)), valid_range: 1..=18446744073709551615 }), fields: Primitive, largest_niche: Some(Niche { offset: Size(0 bytes), value: Pointer(AddressSpace(0)), valid_range: 1..=18446744073709551615 }), uninhabited: false, variants: Single { index: 0 }, max_repr_align: None, unadjusted_abi_align: Align(8 bytes), randomization_seed: 281492156579847 } }, mode: Direct(ArgAttributes { regular: NonNull | NoUndef, arg_ext: None, pointee_size: Size(0 bytes), pointee_align: Some(Align(8 bytes)) }) }], ret: ArgAbi { layout: TyAndLayout { ty: (u8, u8), layout: Layout { size: Size(2 bytes), align: AbiAndPrefAlign { abi: Align(1 bytes), pref: Align(8 bytes) }, backend_repr: ScalarPair(Initialized { value: Int(I8, false), valid_range: 0..=255 }, Initialized { value: Int(I8, false), valid_range: 0..=255 }), fields: Arbitrary { offsets: [Size(0 bytes), Size(1 bytes)], memory_index: [0, 1] }, largest_niche: None, uninhabited: false, variants: Single { index: 0 }, max_repr_align: None, unadjusted_abi_align: Align(1 bytes), randomization_seed: 71776127651151873 } }, mode: Pair(ArgAttributes { regular: NoUndef, arg_ext: None, pointee_size: Size(0 bytes), pointee_align: None }, ArgAttributes { regular: NoUndef, arg_ext: None, pointee_size: Size(0 bytes), pointee_align: None }) }, c_variadic: false, fixed_count: 1, conv: Rust, can_unwind: false }
 //!
 //! ; kind  loc.idx   param    pass mode                            ty
-//! ; ssa   _0    (u8, u8)                          2b 1, 8              var=(0, 1)
+//! ; ssa   _0    (u8, u8)                          2b 1                var=(0, 1)
 //! ; ret   _0      -          Pair(ArgAttributes { regular: NoUndef, arg_ext: None, pointee_size: Size(0 bytes), pointee_align: None }, ArgAttributes { regular: NoUndef, arg_ext: None, pointee_size: Size(0 bytes), pointee_align: None }) (u8, u8)
 //! ; arg   _1      -          Ignore                               IsNotEmpty
-//! ; arg   _2.0    = v0       Direct(ArgAttributes { regular: NonNull | NoUndef, arg_ext: None, pointee_size: Size(0 bytes), pointee_align: Some(Align(8 bytes)) }) &&[u16]
+//! ; arg   _2.0    = v0       Direct(ArgAttributes { regular: NonNull | NoUndef, arg_ext: None, pointee_size: Size(0 bytes), pointee_align: Some(Align(8 bytes)) }) &'{erased} &'{erased} [u16]
 //!
-//! ; kind  local ty                              size align (abi,pref)
-//! ; zst   _1    IsNotEmpty                        0b 1, 8              align=8,offset=
-//! ; stack _2    (&&[u16],)                        8b 8, 8              storage=ss0
-//! ; ssa   _3    &mut IsNotEmpty                   8b 8, 8              var=2
+//! ; kind  local ty                              size align (abi)
+//! ; zst   _1    IsNotEmpty                        0b 1                align=1,offset=
+//! ; stack _2    (&'{erased} &'{erased} [u16],)    8b 8                storage=ss0
+//! ; ssa   _3    &'{erased} mut IsNotEmpty         8b 8                var=2
 //!
-//!     ss0 = explicit_slot 16
+//!     ss0 = explicit_slot 16, align = 16
 //!     sig0 = (i64, i64) -> i8, i8 system_v
-//!     fn0 = colocated u0:23 sig0 ; Instance { def: Item(WithOptConstParam { did: DefId(0:46 ~ example[4e51]::{impl#1}::call_mut), const_param_did: None }), args: [ReErased, ReErased] }
+//!     fn0 = colocated u0:23 sig0 ; Instance { def: Item(DefId(0:46 ~ example[3895]::{impl#1}::call_mut)), args: ['{erased}, '{erased}] }
 //!
 //! block0(v0: i64):
 //!     nop
-//! ; write_cvalue: Addr(Pointer { base: Stack(ss0), offset: Offset32(0) }, None): &&[u16] <- ByVal(v0): &&[u16]
+//! ; write_cvalue: Addr(Pointer { base: Stack(ss0), offset: Offset32(0) }, None): &'{erased} &'{erased} [u16] <- ByVal(v0): &'{erased} &'{erased} [u16]
 //!     stack_store v0, ss0
 //!     jump block1
 //!
 //! block1:
 //!     nop
 //! ; _3 = &mut _1
-//!     v1 = iconst.i64 8
-//! ; write_cvalue: Var(_3, var2): &mut IsNotEmpty <- ByVal(v1): &mut IsNotEmpty
+//!     v1 = iconst.i64 1
+//! ; write_cvalue: Var(_3, var2): &'{erased} mut IsNotEmpty <- ByVal(v1): &'{erased} mut IsNotEmpty
 //! ;
-//! ; _0 = <IsNotEmpty as mini_core::FnMut<(&&[u16],)>>::call_mut(move _3, _2)
+//! ; _0 = <IsNotEmpty as mini_core::FnMut<(&&[u16],)>>::call_mut(move _3, copy _2)
 //!     v2 = stack_load.i64 ss0
-//!     v3, v4 = call fn0(v1, v2)  ; v1 = 8
+//! ; abi: FnAbi { args: [ArgAbi { layout: TyAndLayout { ty: &mut IsNotEmpty, layout: Layout { size: Size(8 bytes), align: AbiAndPrefAlign { abi: Align(8 bytes), pref: Align(8 bytes) }, backend_repr: Scalar(Initialized { value: Pointer(AddressSpace(0)), valid_range: 1..=18446744073709551615 }), fields: Primitive, largest_niche: Some(Niche { offset: Size(0 bytes), value: Pointer(AddressSpace(0)), valid_range: 1..=18446744073709551615 }), uninhabited: false, variants: Single { index: 0 }, max_repr_align: None, unadjusted_abi_align: Align(8 bytes), randomization_seed: 281492156579847 } }, mode: Direct(ArgAttributes { regular: NonNull | NoUndef, arg_ext: None, pointee_size: Size(0 bytes), pointee_align: Some(Align(1 bytes)) }) }, ArgAbi { layout: TyAndLayout { ty: &&[u16], layout: Layout { size: Size(8 bytes), align: AbiAndPrefAlign { abi: Align(8 bytes), pref: Align(8 bytes) }, backend_repr: Scalar(Initialized { value: Pointer(AddressSpace(0)), valid_range: 1..=18446744073709551615 }), fields: Primitive, largest_niche: Some(Niche { offset: Size(0 bytes), value: Pointer(AddressSpace(0)), valid_range: 1..=18446744073709551615 }), uninhabited: false, variants: Single { index: 0 }, max_repr_align: None, unadjusted_abi_align: Align(8 bytes), randomization_seed: 281492156579847 } }, mode: Direct(ArgAttributes { regular: NonNull | NoUndef, arg_ext: None, pointee_size: Size(0 bytes), pointee_align: Some(Align(8 bytes)) }) }], ret: ArgAbi { layout: TyAndLayout { ty: (u8, u8), layout: Layout { size: Size(2 bytes), align: AbiAndPrefAlign { abi: Align(1 bytes), pref: Align(8 bytes) }, backend_repr: ScalarPair(Initialized { value: Int(I8, false), valid_range: 0..=255 }, Initialized { value: Int(I8, false), valid_range: 0..=255 }), fields: Arbitrary { offsets: [Size(0 bytes), Size(1 bytes)], memory_index: [0, 1] }, largest_niche: None, uninhabited: false, variants: Single { index: 0 }, max_repr_align: None, unadjusted_abi_align: Align(1 bytes), randomization_seed: 71776127651151873 } }, mode: Pair(ArgAttributes { regular: NoUndef, arg_ext: None, pointee_size: Size(0 bytes), pointee_align: None }, ArgAttributes { regular: NoUndef, arg_ext: None, pointee_size: Size(0 bytes), pointee_align: None }) }, c_variadic: false, fixed_count: 1, conv: Rust, can_unwind: false }
+//!     v3, v4 = call fn0(v1, v2)  ; v1 = 1
 //!     v5 -> v3
 //!     v6 -> v4
 //! ; write_cvalue: VarPair(_0, var0, var1): (u8, u8) <- ByValPair(v3, v4): (u8, u8)
@@ -73,6 +74,7 @@ pub(crate) struct CommentWriter {
     enabled: bool,
     global_comments: Vec<String>,
     entity_comments: FxHashMap<AnyEntity, String>,
+    inst_post_comments: FxHashMap<Inst, String>,
 }
 
 impl CommentWriter {
@@ -95,7 +97,12 @@ impl CommentWriter {
             vec![]
         };
 
-        CommentWriter { enabled, global_comments, entity_comments: FxHashMap::default() }
+        CommentWriter {
+            enabled,
+            global_comments,
+            entity_comments: FxHashMap::default(),
+            inst_post_comments: FxHashMap::default(),
+        }
     }
 }
 
@@ -127,6 +134,25 @@ impl CommentWriter {
             }
         }
     }
+
+    pub(crate) fn add_post_comment<S: Into<String> + AsRef<str>>(
+        &mut self,
+        entity: Inst,
+        comment: S,
+    ) {
+        debug_assert!(self.enabled);
+
+        use std::collections::hash_map::Entry;
+        match self.inst_post_comments.entry(entity) {
+            Entry::Occupied(mut occ) => {
+                occ.get_mut().push('\n');
+                occ.get_mut().push_str(comment.as_ref());
+            }
+            Entry::Vacant(vac) => {
+                vac.insert(comment.into());
+            }
+        }
+    }
 }
 
 impl FuncWriter for &'_ CommentWriter {
@@ -188,10 +214,13 @@ impl FuncWriter for &'_ CommentWriter {
         inst: Inst,
         indent: usize,
     ) -> fmt::Result {
-        PlainWriter.write_instruction(w, func, aliases, inst, indent)?;
         if let Some(comment) = self.entity_comments.get(&inst.into()) {
             writeln!(w, "; {}", comment.replace('\n', "\n; "))?;
         }
+        PlainWriter.write_instruction(w, func, aliases, inst, indent)?;
+        if let Some(comment) = self.inst_post_comments.get(&inst) {
+            writeln!(w, "; {}", comment.replace('\n', "\n; "))?;
+        }
         Ok(())
     }
 }
@@ -208,6 +237,14 @@ impl FunctionCx<'_, '_, '_> {
     ) {
         self.clif_comments.add_comment(entity, comment);
     }
+
+    pub(crate) fn add_post_comment<S: Into<String> + AsRef<str>>(
+        &mut self,
+        entity: Inst,
+        comment: S,
+    ) {
+        self.clif_comments.add_post_comment(entity, comment);
+    }
 }
 
 pub(crate) fn should_write_ir(tcx: TyCtxt<'_>) -> bool {
diff --git a/compiler/rustc_codegen_cranelift/src/trap.rs b/compiler/rustc_codegen_cranelift/src/trap.rs
deleted file mode 100644
index ac3f58ee1ee..00000000000
--- a/compiler/rustc_codegen_cranelift/src/trap.rs
+++ /dev/null
@@ -1,38 +0,0 @@
-//! Helpers used to print a message and abort in case of certain panics and some detected UB.
-
-use crate::prelude::*;
-
-fn codegen_print(fx: &mut FunctionCx<'_, '_, '_>, msg: &str) {
-    let puts = fx
-        .module
-        .declare_function(
-            "puts",
-            Linkage::Import,
-            &Signature {
-                call_conv: fx.target_config.default_call_conv,
-                params: vec![AbiParam::new(fx.pointer_type)],
-                returns: vec![AbiParam::new(types::I32)],
-            },
-        )
-        .unwrap();
-    let puts = fx.module.declare_func_in_func(puts, &mut fx.bcx.func);
-    if fx.clif_comments.enabled() {
-        fx.add_comment(puts, "puts");
-    }
-
-    let real_msg = format!("trap at {:?} ({}): {}\0", fx.instance, fx.symbol_name, msg);
-    let msg_ptr = fx.anonymous_str(&real_msg);
-    fx.bcx.ins().call(puts, &[msg_ptr]);
-}
-
-/// Use this when something is unimplemented, but `libcore` or `libstd` requires it to codegen.
-///
-/// Trap code: user65535
-pub(crate) fn trap_unimplemented(fx: &mut FunctionCx<'_, '_, '_>, msg: impl AsRef<str>) {
-    codegen_print(fx, msg.as_ref());
-
-    let one = fx.bcx.ins().iconst(types::I32, 1);
-    fx.lib_call("exit", vec![AbiParam::new(types::I32)], vec![], &[one]);
-
-    fx.bcx.ins().trap(TrapCode::user(3).unwrap());
-}
diff --git a/compiler/rustc_codegen_cranelift/src/unwind_module.rs b/compiler/rustc_codegen_cranelift/src/unwind_module.rs
index b950aaa29ce..b4eb939cf25 100644
--- a/compiler/rustc_codegen_cranelift/src/unwind_module.rs
+++ b/compiler/rustc_codegen_cranelift/src/unwind_module.rs
@@ -1,10 +1,10 @@
+use cranelift_codegen::Context;
 use cranelift_codegen::control::ControlPlane;
-use cranelift_codegen::ir::{Function, Signature};
+use cranelift_codegen::ir::Signature;
 use cranelift_codegen::isa::{TargetFrontendConfig, TargetIsa};
-use cranelift_codegen::{Context, FinalizedMachReloc};
 use cranelift_module::{
     DataDescription, DataId, FuncId, FuncOrDataId, Linkage, Module, ModuleDeclarations,
-    ModuleResult,
+    ModuleReloc, ModuleResult,
 };
 use cranelift_object::{ObjectModule, ObjectProduct};
 
@@ -17,8 +17,8 @@ pub(crate) struct UnwindModule<T> {
 }
 
 impl<T: Module> UnwindModule<T> {
-    pub(crate) fn new(module: T, pic_eh_frame: bool) -> Self {
-        let unwind_context = UnwindContext::new(module.isa(), pic_eh_frame);
+    pub(crate) fn new(mut module: T, pic_eh_frame: bool) -> Self {
+        let unwind_context = UnwindContext::new(&mut module, pic_eh_frame);
         UnwindModule { module, unwind_context }
     }
 }
@@ -33,13 +33,10 @@ impl UnwindModule<ObjectModule> {
 
 #[cfg(feature = "jit")]
 impl UnwindModule<cranelift_jit::JITModule> {
-    pub(crate) fn finalize_definitions(&mut self) {
+    pub(crate) fn finalize_definitions(mut self) -> cranelift_jit::JITModule {
         self.module.finalize_definitions().unwrap();
-        let prev_unwind_context = std::mem::replace(
-            &mut self.unwind_context,
-            UnwindContext::new(self.module.isa(), false),
-        );
-        unsafe { prev_unwind_context.register_jit(&self.module) };
+        unsafe { self.unwind_context.register_jit(&self.module) };
+        self.module
     }
 }
 
@@ -94,17 +91,16 @@ impl<T: Module> Module for UnwindModule<T> {
         ctrl_plane: &mut ControlPlane,
     ) -> ModuleResult<()> {
         self.module.define_function_with_control_plane(func, ctx, ctrl_plane)?;
-        self.unwind_context.add_function(func, ctx, self.module.isa());
+        self.unwind_context.add_function(&mut self.module, func, ctx);
         Ok(())
     }
 
     fn define_function_bytes(
         &mut self,
         _func_id: FuncId,
-        _func: &Function,
         _alignment: u64,
         _bytes: &[u8],
-        _relocs: &[FinalizedMachReloc],
+        _relocs: &[ModuleReloc],
     ) -> ModuleResult<()> {
         unimplemented!()
     }
diff --git a/compiler/rustc_codegen_cranelift/src/value_and_place.rs b/compiler/rustc_codegen_cranelift/src/value_and_place.rs
index f8a19589fdd..cbfb215a892 100644
--- a/compiler/rustc_codegen_cranelift/src/value_and_place.rs
+++ b/compiler/rustc_codegen_cranelift/src/value_and_place.rs
@@ -309,6 +309,7 @@ impl<'tcx> CValue<'tcx> {
         match self.0 {
             CValueInner::ByVal(_) | CValueInner::ByValPair(_, _) => unreachable!(),
             CValueInner::ByRef(ptr, None) => {
+                let lane_idx = clif_intcast(fx, lane_idx, fx.pointer_type, false);
                 let field_offset = fx.bcx.ins().imul_imm(lane_idx, lane_layout.size.bytes() as i64);
                 let field_ptr = ptr.offset_value(fx, field_offset);
                 CValue::by_ref(field_ptr, lane_layout)
@@ -324,7 +325,7 @@ impl<'tcx> CValue<'tcx> {
         const_val: ty::ScalarInt,
     ) -> CValue<'tcx> {
         assert_eq!(const_val.size(), layout.size, "{:#?}: {:?}", const_val, layout);
-        use cranelift_codegen::ir::immediates::{Ieee32, Ieee64};
+        use cranelift_codegen::ir::immediates::{Ieee16, Ieee32, Ieee64, Ieee128};
 
         let clif_ty = fx.clif_type(layout.ty).unwrap();
 
@@ -345,12 +346,24 @@ impl<'tcx> CValue<'tcx> {
                 let raw_val = const_val.size().truncate(const_val.to_bits(layout.size));
                 fx.bcx.ins().iconst(clif_ty, raw_val as i64)
             }
+            ty::Float(FloatTy::F16) => {
+                fx.bcx.ins().f16const(Ieee16::with_bits(u16::try_from(const_val).unwrap()))
+            }
             ty::Float(FloatTy::F32) => {
                 fx.bcx.ins().f32const(Ieee32::with_bits(u32::try_from(const_val).unwrap()))
             }
             ty::Float(FloatTy::F64) => {
                 fx.bcx.ins().f64const(Ieee64::with_bits(u64::try_from(const_val).unwrap()))
             }
+            ty::Float(FloatTy::F128) => {
+                let value = fx
+                    .bcx
+                    .func
+                    .dfg
+                    .constants
+                    .insert(Ieee128::with_bits(u128::try_from(const_val).unwrap()).into());
+                fx.bcx.ins().f128const(value)
+            }
             _ => panic!(
                 "CValue::const_val for non bool/char/float/integer/pointer type {:?} is not allowed",
                 layout.ty
@@ -563,27 +576,7 @@ impl<'tcx> CPlace<'tcx> {
                 src_ty,
                 dst_ty,
             );
-            let data = match (src_ty, dst_ty) {
-                (_, _) if src_ty == dst_ty => data,
-
-                // This is a `write_cvalue_transmute`.
-                (types::I32, types::F32)
-                | (types::F32, types::I32)
-                | (types::I64, types::F64)
-                | (types::F64, types::I64) => codegen_bitcast(fx, dst_ty, data),
-                _ if src_ty.is_vector() && dst_ty.is_vector() => codegen_bitcast(fx, dst_ty, data),
-                _ if src_ty.is_vector() || dst_ty.is_vector() => {
-                    // FIXME(bytecodealliance/wasmtime#6104) do something more efficient for transmutes between vectors and integers.
-                    let ptr = fx.create_stack_slot(src_ty.bytes(), src_ty.bytes());
-                    ptr.store(fx, data, MemFlags::trusted());
-                    ptr.load(fx, dst_ty, MemFlags::trusted())
-                }
-
-                // `CValue`s should never contain SSA-only types, so if you ended
-                // up here having seen an error like `B1 -> I8`, then before
-                // calling `write_cvalue` you need to add a `bint` instruction.
-                _ => unreachable!("write_cvalue_transmute: {:?} -> {:?}", src_ty, dst_ty),
-            };
+            let data = if src_ty == dst_ty { data } else { codegen_bitcast(fx, dst_ty, data) };
             //fx.bcx.set_val_label(data, cranelift_codegen::ir::ValueLabel::new(var.index()));
             fx.bcx.def_var(var, data);
         }
@@ -591,13 +584,9 @@ impl<'tcx> CPlace<'tcx> {
         assert_eq!(self.layout().size, from.layout().size);
 
         if fx.clif_comments.enabled() {
-            use cranelift_codegen::cursor::{Cursor, CursorPosition};
-            let cur_block = match fx.bcx.cursor().position() {
-                CursorPosition::After(block) => block,
-                _ => unreachable!(),
-            };
-            fx.add_comment(
-                fx.bcx.func.layout.last_inst(cur_block).unwrap(),
+            let inst = fx.bcx.func.layout.last_inst(fx.bcx.current_block().unwrap()).unwrap();
+            fx.add_post_comment(
+                inst,
                 format!(
                     "{}: {:?}: {:?} <- {:?}: {:?}",
                     method,
@@ -806,6 +795,35 @@ impl<'tcx> CPlace<'tcx> {
         }
     }
 
+    /// Write a value to an individual lane in a SIMD vector.
+    pub(crate) fn write_lane_dyn(
+        self,
+        fx: &mut FunctionCx<'_, '_, 'tcx>,
+        lane_idx: Value,
+        value: CValue<'tcx>,
+    ) {
+        let layout = self.layout();
+        assert!(layout.ty.is_simd());
+        let (_lane_count, lane_ty) = layout.ty.simd_size_and_type(fx.tcx);
+        let lane_layout = fx.layout_of(lane_ty);
+        assert_eq!(lane_layout, value.layout());
+
+        match self.inner {
+            CPlaceInner::Var(_, _) => unreachable!(),
+            CPlaceInner::VarPair(_, _, _) => unreachable!(),
+            CPlaceInner::Addr(ptr, None) => {
+                let lane_idx = clif_intcast(fx, lane_idx, fx.pointer_type, false);
+                let field_offset = fx
+                    .bcx
+                    .ins()
+                    .imul_imm(lane_idx, i64::try_from(lane_layout.size.bytes()).unwrap());
+                let field_ptr = ptr.offset_value(fx, field_offset);
+                CPlace::for_ptr(field_ptr, lane_layout).write_cvalue(fx, value);
+            }
+            CPlaceInner::Addr(_, Some(_)) => unreachable!(),
+        }
+    }
+
     pub(crate) fn place_index(
         self,
         fx: &mut FunctionCx<'_, '_, 'tcx>,