about summary refs log tree commit diff
path: root/compiler/rustc_codegen_cranelift/src
diff options
context:
space:
mode:
Diffstat (limited to 'compiler/rustc_codegen_cranelift/src')
-rw-r--r--compiler/rustc_codegen_cranelift/src/abi/mod.rs14
-rw-r--r--compiler/rustc_codegen_cranelift/src/abi/pass_mode.rs7
-rw-r--r--compiler/rustc_codegen_cranelift/src/archive.rs10
-rw-r--r--compiler/rustc_codegen_cranelift/src/base.rs75
-rw-r--r--compiler/rustc_codegen_cranelift/src/common.rs10
-rw-r--r--compiler/rustc_codegen_cranelift/src/constant.rs17
-rw-r--r--compiler/rustc_codegen_cranelift/src/debuginfo/emit.rs4
-rw-r--r--compiler/rustc_codegen_cranelift/src/debuginfo/mod.rs62
-rw-r--r--compiler/rustc_codegen_cranelift/src/debuginfo/object.rs2
-rw-r--r--compiler/rustc_codegen_cranelift/src/debuginfo/unwind.rs8
-rw-r--r--compiler/rustc_codegen_cranelift/src/driver/aot.rs13
-rw-r--r--compiler/rustc_codegen_cranelift/src/driver/jit.rs26
-rw-r--r--compiler/rustc_codegen_cranelift/src/inline_asm.rs635
-rw-r--r--compiler/rustc_codegen_cranelift/src/intrinsics/mod.rs14
-rw-r--r--compiler/rustc_codegen_cranelift/src/intrinsics/simd.rs53
-rw-r--r--compiler/rustc_codegen_cranelift/src/lib.rs34
-rw-r--r--compiler/rustc_codegen_cranelift/src/metadata.rs76
-rw-r--r--compiler/rustc_codegen_cranelift/src/pretty_clif.rs13
-rw-r--r--compiler/rustc_codegen_cranelift/src/trap.rs2
-rw-r--r--compiler/rustc_codegen_cranelift/src/value_and_place.rs62
20 files changed, 706 insertions, 431 deletions
diff --git a/compiler/rustc_codegen_cranelift/src/abi/mod.rs b/compiler/rustc_codegen_cranelift/src/abi/mod.rs
index 78fdf9c02d0..72ebc84c1a3 100644
--- a/compiler/rustc_codegen_cranelift/src/abi/mod.rs
+++ b/compiler/rustc_codegen_cranelift/src/abi/mod.rs
@@ -18,11 +18,11 @@ pub(crate) use self::returning::codegen_return;
 
 fn clif_sig_from_fn_abi<'tcx>(
     tcx: TyCtxt<'tcx>,
-    triple: &target_lexicon::Triple,
+    default_call_conv: CallConv,
     fn_abi: &FnAbi<'tcx, Ty<'tcx>>,
 ) -> Signature {
     let call_conv = match fn_abi.conv {
-        Conv::Rust | Conv::C => CallConv::triple_default(triple),
+        Conv::Rust | Conv::C => default_call_conv,
         Conv::X86_64SysV => CallConv::SystemV,
         Conv::X86_64Win64 => CallConv::WindowsFastcall,
         Conv::ArmAapcs
@@ -55,7 +55,7 @@ pub(crate) fn get_function_sig<'tcx>(
     assert!(!inst.substs.needs_infer());
     clif_sig_from_fn_abi(
         tcx,
-        triple,
+        CallConv::triple_default(triple),
         &RevealAllLayoutCx(tcx).fn_abi_of_instance(inst, ty::List::empty()),
     )
 }
@@ -91,7 +91,7 @@ impl<'tcx> FunctionCx<'_, '_, 'tcx> {
         returns: Vec<AbiParam>,
         args: &[Value],
     ) -> &[Value] {
-        let sig = Signature { params, returns, call_conv: CallConv::triple_default(self.triple()) };
+        let sig = Signature { params, returns, call_conv: self.target_config.default_call_conv };
         let func_id = self.module.declare_function(name, Linkage::Import, &sig).unwrap();
         let func_ref = self.module.declare_func_in_func(func_id, &mut self.bcx.func);
         let call_inst = self.bcx.ins().call(func_ref, args);
@@ -420,7 +420,7 @@ pub(crate) fn codegen_terminator_call<'tcx>(
             }
 
             let (ptr, method) = crate::vtable::get_ptr_and_method_ref(fx, args[0].value, idx);
-            let sig = clif_sig_from_fn_abi(fx.tcx, fx.triple(), &fn_abi);
+            let sig = clif_sig_from_fn_abi(fx.tcx, fx.target_config.default_call_conv, &fn_abi);
             let sig = fx.bcx.import_signature(sig);
 
             (CallTarget::Indirect(sig, method), Some(ptr))
@@ -440,7 +440,7 @@ pub(crate) fn codegen_terminator_call<'tcx>(
             }
 
             let func = codegen_operand(fx, func).load_scalar(fx);
-            let sig = clif_sig_from_fn_abi(fx.tcx, fx.triple(), &fn_abi);
+            let sig = clif_sig_from_fn_abi(fx.tcx, fx.target_config.default_call_conv, &fn_abi);
             let sig = fx.bcx.import_signature(sig);
 
             (CallTarget::Indirect(sig, func), None)
@@ -531,7 +531,7 @@ pub(crate) fn codegen_drop<'tcx>(
                 let fn_abi =
                     RevealAllLayoutCx(fx.tcx).fn_abi_of_instance(virtual_drop, ty::List::empty());
 
-                let sig = clif_sig_from_fn_abi(fx.tcx, fx.triple(), &fn_abi);
+                let sig = clif_sig_from_fn_abi(fx.tcx, fx.target_config.default_call_conv, &fn_abi);
                 let sig = fx.bcx.import_signature(sig);
                 fx.bcx.ins().call_indirect(sig, drop_fn, &[ptr]);
             }
diff --git a/compiler/rustc_codegen_cranelift/src/abi/pass_mode.rs b/compiler/rustc_codegen_cranelift/src/abi/pass_mode.rs
index 2144e7ed67a..9f0bd31e95f 100644
--- a/compiler/rustc_codegen_cranelift/src/abi/pass_mode.rs
+++ b/compiler/rustc_codegen_cranelift/src/abi/pass_mode.rs
@@ -71,7 +71,7 @@ fn cast_target_to_abi_params(cast: CastTarget) -> SmallVec<[AbiParam; 2]> {
         .prefix
         .iter()
         .flatten()
-        .map(|&kind| reg_to_abi_param(Reg { kind, size: cast.prefix_chunk_size }))
+        .map(|&reg| reg_to_abi_param(reg))
         .chain((0..rest_count).map(|_| reg_to_abi_param(cast.rest.unit)))
         .collect::<SmallVec<_>>();
 
@@ -117,7 +117,9 @@ impl<'tcx> ArgAbiExt<'tcx> for ArgAbi<'tcx, Ty<'tcx>> {
             PassMode::Cast(cast) => cast_target_to_abi_params(cast),
             PassMode::Indirect { attrs, extra_attrs: None, on_stack } => {
                 if on_stack {
-                    let size = u32::try_from(self.layout.size.bytes()).unwrap();
+                    // Abi requires aligning struct size to pointer size
+                    let size = self.layout.size.align_to(tcx.data_layout.pointer_align.abi);
+                    let size = u32::try_from(size.bytes()).unwrap();
                     smallvec![apply_arg_attrs_to_abi_param(
                         AbiParam::special(pointer_ty(tcx), ArgumentPurpose::StructArgument(size),),
                         attrs
@@ -204,7 +206,6 @@ pub(super) fn from_casted_value<'tcx>(
         // It may also be smaller for example when the type is a wrapper around an integer with a
         // larger alignment than the integer.
         size: (std::cmp::max(abi_param_size, layout_size) + 15) / 16 * 16,
-        offset: None,
     });
     let ptr = Pointer::new(fx.bcx.ins().stack_addr(pointer_ty(fx.tcx), stack_slot, 0));
     let mut offset = 0;
diff --git a/compiler/rustc_codegen_cranelift/src/archive.rs b/compiler/rustc_codegen_cranelift/src/archive.rs
index 71f510c037f..b0eb3864d80 100644
--- a/compiler/rustc_codegen_cranelift/src/archive.rs
+++ b/compiler/rustc_codegen_cranelift/src/archive.rs
@@ -1,7 +1,6 @@
 //! Creation of ar archives like for the lib and staticlib crate type
 
 use std::collections::BTreeMap;
-use std::convert::TryFrom;
 use std::fs::File;
 use std::io::{self, Read, Seek};
 use std::path::{Path, PathBuf};
@@ -10,7 +9,7 @@ use rustc_codegen_ssa::back::archive::ArchiveBuilder;
 use rustc_session::Session;
 
 use object::read::archive::ArchiveFile;
-use object::{Object, ObjectSymbol, ReadCache, SymbolKind};
+use object::{Object, ObjectSymbol, ReadCache};
 
 #[derive(Debug)]
 enum ArchiveEntry {
@@ -150,12 +149,7 @@ impl<'a> ArchiveBuilder<'a> for ArArchiveBuilder<'a> {
                             object
                                 .symbols()
                                 .filter_map(|symbol| {
-                                    if symbol.is_undefined()
-                                        || symbol.is_local()
-                                        || symbol.kind() != SymbolKind::Data
-                                            && symbol.kind() != SymbolKind::Text
-                                            && symbol.kind() != SymbolKind::Tls
-                                    {
+                                    if symbol.is_undefined() || symbol.is_local() {
                                         None
                                     } else {
                                         symbol.name().map(|name| name.as_bytes().to_vec()).ok()
diff --git a/compiler/rustc_codegen_cranelift/src/base.rs b/compiler/rustc_codegen_cranelift/src/base.rs
index 1b30edd2938..5a889734f21 100644
--- a/compiler/rustc_codegen_cranelift/src/base.rs
+++ b/compiler/rustc_codegen_cranelift/src/base.rs
@@ -1,6 +1,7 @@
 //! Codegen of a single function
 
 use cranelift_codegen::binemit::{NullStackMapSink, NullTrapSink};
+use rustc_ast::InlineAsmOptions;
 use rustc_index::vec::IndexVec;
 use rustc_middle::ty::adjustment::PointerCast;
 use rustc_middle::ty::layout::FnAbiOf;
@@ -48,13 +49,15 @@ pub(crate) fn codegen_fn<'tcx>(
         (0..mir.basic_blocks().len()).map(|_| bcx.create_block()).collect();
 
     // Make FunctionCx
-    let pointer_type = module.target_config().pointer_type();
+    let target_config = module.target_config();
+    let pointer_type = target_config.pointer_type();
     let clif_comments = crate::pretty_clif::CommentWriter::new(tcx, instance);
 
     let mut fx = FunctionCx {
         cx,
         module,
         tcx,
+        target_config,
         pointer_type,
         constants_cx: ConstantCx::new(),
 
@@ -71,8 +74,6 @@ pub(crate) fn codegen_fn<'tcx>(
         clif_comments,
         source_info_set: indexmap::IndexSet::new(),
         next_ssa_var: 0,
-
-        inline_asm_index: 0,
     };
 
     let arg_uninhabited = fx
@@ -203,7 +204,6 @@ pub(crate) fn verify_func(
                 tcx.sess.err(&format!("{:?}", err));
                 let pretty_error = cranelift_codegen::print_errors::pretty_verifier_error(
                     &func,
-                    None,
                     Some(Box::new(writer)),
                     err,
                 );
@@ -239,7 +239,8 @@ fn codegen_fn_content(fx: &mut FunctionCx<'_, '_, '_>) {
             fx.add_comment(inst, terminator_head);
         }
 
-        fx.set_debug_loc(bb_data.terminator().source_info);
+        let source_info = bb_data.terminator().source_info;
+        fx.set_debug_loc(source_info);
 
         match &bb_data.terminator().kind {
             TerminatorKind::Goto { target } => {
@@ -294,20 +295,18 @@ fn codegen_fn_content(fx: &mut FunctionCx<'_, '_, '_>) {
                     AssertKind::BoundsCheck { ref len, ref index } => {
                         let len = codegen_operand(fx, len).load_scalar(fx);
                         let index = codegen_operand(fx, index).load_scalar(fx);
-                        let location = fx
-                            .get_caller_location(bb_data.terminator().source_info.span)
-                            .load_scalar(fx);
+                        let location = fx.get_caller_location(source_info.span).load_scalar(fx);
 
                         codegen_panic_inner(
                             fx,
                             rustc_hir::LangItem::PanicBoundsCheck,
                             &[index, len, location],
-                            bb_data.terminator().source_info.span,
+                            source_info.span,
                         );
                     }
                     _ => {
                         let msg_str = msg.description();
-                        codegen_panic(fx, msg_str, bb_data.terminator().source_info.span);
+                        codegen_panic(fx, msg_str, source_info.span);
                     }
                 }
             }
@@ -378,10 +377,18 @@ fn codegen_fn_content(fx: &mut FunctionCx<'_, '_, '_>) {
                 options,
                 destination,
                 line_spans: _,
+                cleanup: _,
             } => {
+                if options.contains(InlineAsmOptions::MAY_UNWIND) {
+                    fx.tcx.sess.span_fatal(
+                        source_info.span,
+                        "cranelift doesn't support unwinding from inline assembly.",
+                    );
+                }
+
                 crate::inline_asm::codegen_inline_asm(
                     fx,
-                    bb_data.terminator().source_info.span,
+                    source_info.span,
                     template,
                     operands,
                     *options,
@@ -415,7 +422,7 @@ fn codegen_fn_content(fx: &mut FunctionCx<'_, '_, '_>) {
             }
             TerminatorKind::Drop { place, target, unwind: _ } => {
                 let drop_place = codegen_place(fx, *place);
-                crate::abi::codegen_drop(fx, bb_data.terminator().source_info.span, drop_place);
+                crate::abi::codegen_drop(fx, source_info.span, drop_place);
 
                 let target_block = fx.get_block(*target);
                 fx.bcx.ins().jump(target_block, &[]);
@@ -671,7 +678,7 @@ fn codegen_stmt<'tcx>(
                         // FIXME use emit_small_memset where possible
                         let addr = lval.to_ptr().get_addr(fx);
                         let val = operand.load_scalar(fx);
-                        fx.bcx.call_memset(fx.module.target_config(), addr, val, times);
+                        fx.bcx.call_memset(fx.target_config, addr, val, times);
                     } else {
                         let loop_block = fx.bcx.create_block();
                         let loop_block2 = fx.bcx.create_block();
@@ -708,30 +715,6 @@ fn codegen_stmt<'tcx>(
                     let operand = operand.load_scalar(fx);
                     lval.write_cvalue(fx, CValue::by_val(operand, box_layout));
                 }
-                Rvalue::NullaryOp(NullOp::Box, content_ty) => {
-                    let usize_type = fx.clif_type(fx.tcx.types.usize).unwrap();
-                    let content_ty = fx.monomorphize(content_ty);
-                    let layout = fx.layout_of(content_ty);
-                    let llsize = fx.bcx.ins().iconst(usize_type, layout.size.bytes() as i64);
-                    let llalign = fx.bcx.ins().iconst(usize_type, layout.align.abi.bytes() as i64);
-                    let box_layout = fx.layout_of(fx.tcx.mk_box(content_ty));
-
-                    // Allocate space:
-                    let def_id =
-                        match fx.tcx.lang_items().require(rustc_hir::LangItem::ExchangeMalloc) {
-                            Ok(id) => id,
-                            Err(s) => {
-                                fx.tcx
-                                    .sess
-                                    .fatal(&format!("allocation of `{}` {}", box_layout.ty, s));
-                            }
-                        };
-                    let instance = ty::Instance::mono(fx.tcx, def_id).polymorphize(fx.tcx);
-                    let func_ref = fx.get_function_ref(instance);
-                    let call = fx.bcx.ins().call(func_ref, &[llsize, llalign]);
-                    let ptr = fx.bcx.inst_results(call)[0];
-                    lval.write_cvalue(fx, CValue::by_val(ptr, box_layout));
-                }
                 Rvalue::NullaryOp(null_op, ty) => {
                     assert!(
                         lval.layout()
@@ -742,10 +725,8 @@ fn codegen_stmt<'tcx>(
                     let val = match null_op {
                         NullOp::SizeOf => layout.size.bytes(),
                         NullOp::AlignOf => layout.align.abi.bytes(),
-                        NullOp::Box => unreachable!(),
                     };
-                    let val =
-                        CValue::const_val(fx, fx.layout_of(fx.tcx.types.usize), val.into());
+                    let val = CValue::const_val(fx, fx.layout_of(fx.tcx.types.usize), val.into());
                     lval.write_cvalue(fx, val);
                 }
                 Rvalue::Aggregate(ref kind, ref operands) => match kind.as_ref() {
@@ -768,18 +749,6 @@ fn codegen_stmt<'tcx>(
         | StatementKind::Retag { .. }
         | StatementKind::AscribeUserType(..) => {}
 
-        StatementKind::LlvmInlineAsm(asm) => {
-            match asm.asm.asm.as_str().trim() {
-                "" => {
-                    // Black box
-                }
-                _ => fx.tcx.sess.span_fatal(
-                    stmt.source_info.span,
-                    "Legacy `llvm_asm!` inline assembly is not supported. \
-                    Try using the new `asm!` instead.",
-                ),
-            }
-        }
         StatementKind::Coverage { .. } => fx.tcx.sess.fatal("-Zcoverage is unimplemented"),
         StatementKind::CopyNonOverlapping(inner) => {
             let dst = codegen_operand(fx, &inner.dst);
@@ -793,7 +762,7 @@ fn codegen_stmt<'tcx>(
             let elem_size: u64 = pointee.size.bytes();
             let bytes =
                 if elem_size != 1 { fx.bcx.ins().imul_imm(count, elem_size as i64) } else { count };
-            fx.bcx.call_memcpy(fx.module.target_config(), dst, src, bytes);
+            fx.bcx.call_memcpy(fx.target_config, dst, src, bytes);
         }
     }
 }
diff --git a/compiler/rustc_codegen_cranelift/src/common.rs b/compiler/rustc_codegen_cranelift/src/common.rs
index 0e84681d9ad..3b6025c73d1 100644
--- a/compiler/rustc_codegen_cranelift/src/common.rs
+++ b/compiler/rustc_codegen_cranelift/src/common.rs
@@ -1,3 +1,4 @@
+use cranelift_codegen::isa::TargetFrontendConfig;
 use rustc_index::vec::IndexVec;
 use rustc_middle::ty::layout::{
     FnAbiError, FnAbiOfHelpers, FnAbiRequest, LayoutError, LayoutOfHelpers,
@@ -235,7 +236,8 @@ pub(crate) struct FunctionCx<'m, 'clif, 'tcx: 'm> {
     pub(crate) cx: &'clif mut crate::CodegenCx<'tcx>,
     pub(crate) module: &'m mut dyn Module,
     pub(crate) tcx: TyCtxt<'tcx>,
-    pub(crate) pointer_type: Type, // Cached from module
+    pub(crate) target_config: TargetFrontendConfig, // Cached from module
+    pub(crate) pointer_type: Type,                  // Cached from module
     pub(crate) constants_cx: ConstantCx,
 
     pub(crate) instance: Instance<'tcx>,
@@ -255,8 +257,6 @@ pub(crate) struct FunctionCx<'m, 'clif, 'tcx: 'm> {
 
     /// This should only be accessed by `CPlace::new_var`.
     pub(crate) next_ssa_var: u32,
-
-    pub(crate) inline_asm_index: u32,
 }
 
 impl<'tcx> LayoutOfHelpers<'tcx> for FunctionCx<'_, '_, 'tcx> {
@@ -359,10 +359,6 @@ impl<'tcx> FunctionCx<'_, '_, 'tcx> {
         crate::constant::codegen_const_value(self, const_loc, self.tcx.caller_location_ty())
     }
 
-    pub(crate) fn triple(&self) -> &target_lexicon::Triple {
-        self.module.isa().triple()
-    }
-
     pub(crate) fn anonymous_str(&mut self, msg: &str) -> Value {
         let mut data_ctx = DataContext::new();
         data_ctx.define(msg.as_bytes().to_vec().into_boxed_slice());
diff --git a/compiler/rustc_codegen_cranelift/src/constant.rs b/compiler/rustc_codegen_cranelift/src/constant.rs
index 5c4991f1fb6..74571817969 100644
--- a/compiler/rustc_codegen_cranelift/src/constant.rs
+++ b/compiler/rustc_codegen_cranelift/src/constant.rs
@@ -129,11 +129,13 @@ pub(crate) fn codegen_constant<'tcx>(
     };
     let const_val = match const_.val {
         ConstKind::Value(const_val) => const_val,
-        ConstKind::Unevaluated(uv) if fx.tcx.is_static(uv.def.did) => {
-            assert!(uv.substs(fx.tcx).is_empty());
-            assert!(uv.promoted.is_none());
+        ConstKind::Unevaluated(ty::Unevaluated { def, substs, promoted })
+            if fx.tcx.is_static(def.did) =>
+        {
+            assert!(substs.is_empty());
+            assert!(promoted.is_none());
 
-            return codegen_static_ref(fx, uv.def.did, fx.layout_of(const_.ty)).to_cvalue(fx);
+            return codegen_static_ref(fx, def.did, fx.layout_of(const_.ty)).to_cvalue(fx);
         }
         ConstKind::Unevaluated(unevaluated) => {
             match fx.tcx.const_eval_resolve(ParamEnv::reveal_all(), unevaluated, None) {
@@ -369,7 +371,7 @@ fn define_all_allocs(tcx: TyCtxt<'_>, module: &mut dyn Module, cx: &mut Constant
             TodoItem::Static(def_id) => {
                 //println!("static {:?}", def_id);
 
-                let section_name = tcx.codegen_fn_attrs(def_id).link_section.map(|s| s.as_str());
+                let section_name = tcx.codegen_fn_attrs(def_id).link_section;
 
                 let alloc = tcx.eval_static_initializer(def_id).unwrap();
 
@@ -388,6 +390,7 @@ fn define_all_allocs(tcx: TyCtxt<'_>, module: &mut dyn Module, cx: &mut Constant
 
         if let Some(section_name) = section_name {
             let (segment_name, section_name) = if tcx.sess.target.is_like_osx {
+                let section_name = section_name.as_str();
                 if let Some(names) = section_name.split_once(',') {
                     names
                 } else {
@@ -397,7 +400,7 @@ fn define_all_allocs(tcx: TyCtxt<'_>, module: &mut dyn Module, cx: &mut Constant
                     ));
                 }
             } else {
-                ("", &*section_name)
+                ("", section_name.as_str())
             };
             data_ctx.set_segment_section(segment_name, section_name);
         }
@@ -505,7 +508,7 @@ pub(crate) fn mir_operand_get_const_val<'tcx>(
                         {
                             return None;
                         }
-                        StatementKind::LlvmInlineAsm(_) | StatementKind::CopyNonOverlapping(_) => {
+                        StatementKind::CopyNonOverlapping(_) => {
                             return None;
                         } // conservative handling
                         StatementKind::Assign(_)
diff --git a/compiler/rustc_codegen_cranelift/src/debuginfo/emit.rs b/compiler/rustc_codegen_cranelift/src/debuginfo/emit.rs
index c8c2d50b034..589910ede96 100644
--- a/compiler/rustc_codegen_cranelift/src/debuginfo/emit.rs
+++ b/compiler/rustc_codegen_cranelift/src/debuginfo/emit.rs
@@ -67,10 +67,8 @@ impl WriterRelocate {
     }
 
     /// Perform the collected relocations to be usable for JIT usage.
-    #[cfg(feature = "jit")]
+    #[cfg(all(feature = "jit", not(windows)))]
     pub(super) fn relocate_for_jit(mut self, jit_module: &cranelift_jit::JITModule) -> Vec<u8> {
-        use std::convert::TryInto;
-
         for reloc in self.relocs.drain(..) {
             match reloc.name {
                 super::DebugRelocName::Section(_) => unreachable!(),
diff --git a/compiler/rustc_codegen_cranelift/src/debuginfo/mod.rs b/compiler/rustc_codegen_cranelift/src/debuginfo/mod.rs
index 6d172817cb1..8e203b8cfa0 100644
--- a/compiler/rustc_codegen_cranelift/src/debuginfo/mod.rs
+++ b/compiler/rustc_codegen_cranelift/src/debuginfo/mod.rs
@@ -10,7 +10,7 @@ use crate::prelude::*;
 use rustc_index::vec::IndexVec;
 
 use cranelift_codegen::entity::EntityRef;
-use cranelift_codegen::ir::{LabelValueLoc, StackSlots, ValueLabel, ValueLoc};
+use cranelift_codegen::ir::{Endianness, LabelValueLoc, ValueLabel};
 use cranelift_codegen::isa::TargetIsa;
 use cranelift_codegen::ValueLocRange;
 
@@ -23,15 +23,6 @@ use gimli::{Encoding, Format, LineEncoding, RunTimeEndian, X86_64};
 pub(crate) use emit::{DebugReloc, DebugRelocName};
 pub(crate) use unwind::UnwindContext;
 
-fn target_endian(tcx: TyCtxt<'_>) -> RunTimeEndian {
-    use rustc_target::abi::Endian;
-
-    match tcx.data_layout.endian {
-        Endian::Big => RunTimeEndian::Big,
-        Endian::Little => RunTimeEndian::Little,
-    }
-}
-
 pub(crate) struct DebugContext<'tcx> {
     tcx: TyCtxt<'tcx>,
 
@@ -60,6 +51,11 @@ impl<'tcx> DebugContext<'tcx> {
             address_size: isa.frontend_config().pointer_bytes(),
         };
 
+        let endian = match isa.endianness() {
+            Endianness::Little => RunTimeEndian::Little,
+            Endianness::Big => RunTimeEndian::Big,
+        };
+
         let mut dwarf = DwarfUnit::new(encoding);
 
         let producer = format!(
@@ -67,7 +63,12 @@ impl<'tcx> DebugContext<'tcx> {
             rustc_interface::util::version_str().unwrap_or("unknown version"),
             cranelift_codegen::VERSION,
         );
-        let comp_dir = tcx.sess.opts.working_dir.to_string_lossy(FileNameDisplayPreference::Remapped).into_owned();
+        let comp_dir = tcx
+            .sess
+            .opts
+            .working_dir
+            .to_string_lossy(FileNameDisplayPreference::Remapped)
+            .into_owned();
         let (name, file_info) = match tcx.sess.local_crate_source_file.clone() {
             Some(path) => {
                 let name = path.to_string_lossy().into_owned();
@@ -103,7 +104,7 @@ impl<'tcx> DebugContext<'tcx> {
         DebugContext {
             tcx,
 
-            endian: target_endian(tcx),
+            endian,
 
             dwarf,
             unit_range_list: RangeList(Vec::new()),
@@ -173,7 +174,7 @@ impl<'tcx> DebugContext<'tcx> {
 
                     field_entry.set(
                         gimli::DW_AT_name,
-                        AttributeValue::String(field_def.ident.as_str().to_string().into_bytes()),
+                        AttributeValue::String(field_def.name.as_str().to_string().into_bytes()),
                     );
                     field_entry.set(
                         gimli::DW_AT_data_member_location,
@@ -250,7 +251,7 @@ impl<'tcx> DebugContext<'tcx> {
 
         // FIXME make it more reliable and implement scopes before re-enabling this.
         if false {
-            let value_labels_ranges = context.build_value_labels_ranges(isa).unwrap();
+            let value_labels_ranges = std::collections::HashMap::new(); // FIXME
 
             for (local, _local_decl) in mir.local_decls.iter_enumerated() {
                 let ty = self.tcx.subst_and_normalize_erasing_regions(
@@ -264,7 +265,6 @@ impl<'tcx> DebugContext<'tcx> {
                     self,
                     isa,
                     symbol,
-                    context,
                     &local_map,
                     &value_labels_ranges,
                     Place { local, projection: ty::List::empty() },
@@ -283,7 +283,6 @@ fn place_location<'tcx>(
     debug_context: &mut DebugContext<'tcx>,
     isa: &dyn TargetIsa,
     symbol: usize,
-    context: &Context,
     local_map: &IndexVec<mir::Local, CPlace<'tcx>>,
     #[allow(rustc::default_hash_types)] value_labels_ranges: &std::collections::HashMap<
         ValueLabel,
@@ -306,12 +305,7 @@ fn place_location<'tcx>(
                                 addend: i64::from(value_loc_range.start),
                             },
                             end: Address::Symbol { symbol, addend: i64::from(value_loc_range.end) },
-                            data: translate_loc(
-                                isa,
-                                value_loc_range.loc,
-                                &context.func.stack_slots,
-                            )
-                            .unwrap(),
+                            data: translate_loc(isa, value_loc_range.loc).unwrap(),
                         })
                         .collect(),
                 );
@@ -340,34 +334,14 @@ fn place_location<'tcx>(
             AttributeValue::Exprloc(Expression::new())
 
             // For PointerBase::Stack:
-            //AttributeValue::Exprloc(translate_loc(ValueLoc::Stack(*stack_slot), &context.func.stack_slots).unwrap())
+            //AttributeValue::Exprloc(translate_loc(ValueLoc::Stack(*stack_slot)).unwrap())
         }
     }
 }
 
 // Adapted from https://github.com/CraneStation/wasmtime/blob/5a1845b4caf7a5dba8eda1fef05213a532ed4259/crates/debug/src/transform/expression.rs#L59-L137
-fn translate_loc(
-    isa: &dyn TargetIsa,
-    loc: LabelValueLoc,
-    stack_slots: &StackSlots,
-) -> Option<Expression> {
+fn translate_loc(isa: &dyn TargetIsa, loc: LabelValueLoc) -> Option<Expression> {
     match loc {
-        LabelValueLoc::ValueLoc(ValueLoc::Reg(reg)) => {
-            let machine_reg = isa.map_dwarf_register(reg).unwrap();
-            let mut expr = Expression::new();
-            expr.op_reg(gimli::Register(machine_reg));
-            Some(expr)
-        }
-        LabelValueLoc::ValueLoc(ValueLoc::Stack(ss)) => {
-            if let Some(ss_offset) = stack_slots[ss].offset {
-                let mut expr = Expression::new();
-                expr.op_breg(X86_64::RBP, i64::from(ss_offset) + 16);
-                Some(expr)
-            } else {
-                None
-            }
-        }
-        LabelValueLoc::ValueLoc(ValueLoc::Unassigned) => unreachable!(),
         LabelValueLoc::Reg(reg) => {
             let machine_reg = isa.map_regalloc_reg_to_dwarf(reg).unwrap();
             let mut expr = Expression::new();
diff --git a/compiler/rustc_codegen_cranelift/src/debuginfo/object.rs b/compiler/rustc_codegen_cranelift/src/debuginfo/object.rs
index 9984dc92c44..9dc9b2cf9f8 100644
--- a/compiler/rustc_codegen_cranelift/src/debuginfo/object.rs
+++ b/compiler/rustc_codegen_cranelift/src/debuginfo/object.rs
@@ -1,5 +1,3 @@
-use std::convert::{TryFrom, TryInto};
-
 use rustc_data_structures::fx::FxHashMap;
 
 use cranelift_module::FuncId;
diff --git a/compiler/rustc_codegen_cranelift/src/debuginfo/unwind.rs b/compiler/rustc_codegen_cranelift/src/debuginfo/unwind.rs
index f0896ea0e16..e4f28338096 100644
--- a/compiler/rustc_codegen_cranelift/src/debuginfo/unwind.rs
+++ b/compiler/rustc_codegen_cranelift/src/debuginfo/unwind.rs
@@ -2,6 +2,7 @@
 
 use crate::prelude::*;
 
+use cranelift_codegen::ir::Endianness;
 use cranelift_codegen::isa::{unwind::UnwindInfo, TargetIsa};
 
 use cranelift_object::ObjectProduct;
@@ -17,8 +18,11 @@ pub(crate) struct UnwindContext {
 }
 
 impl UnwindContext {
-    pub(crate) fn new(tcx: TyCtxt<'_>, isa: &dyn TargetIsa, pic_eh_frame: bool) -> Self {
-        let endian = super::target_endian(tcx);
+    pub(crate) fn new(isa: &dyn TargetIsa, pic_eh_frame: bool) -> Self {
+        let endian = match isa.endianness() {
+            Endianness::Little => RunTimeEndian::Little,
+            Endianness::Big => RunTimeEndian::Big,
+        };
         let mut frame_table = FrameTable::default();
 
         let cie_id = if let Some(mut cie) = isa.create_systemv_cie() {
diff --git a/compiler/rustc_codegen_cranelift/src/driver/aot.rs b/compiler/rustc_codegen_cranelift/src/driver/aot.rs
index 0a8d6122aa7..046e4393a68 100644
--- a/compiler/rustc_codegen_cranelift/src/driver/aot.rs
+++ b/compiler/rustc_codegen_cranelift/src/driver/aot.rs
@@ -4,6 +4,7 @@
 use std::path::PathBuf;
 
 use rustc_ast::{InlineAsmOptions, InlineAsmTemplatePiece};
+use rustc_codegen_ssa::back::metadata::create_compressed_metadata_file;
 use rustc_codegen_ssa::{CodegenResults, CompiledModule, CrateInfo, ModuleKind};
 use rustc_data_structures::stable_hasher::{HashStable, StableHasher};
 use rustc_metadata::EncodedMetadata;
@@ -84,7 +85,7 @@ fn reuse_workproduct_for_cgu(
     let work_product = cgu.work_product(tcx);
     if let Some(saved_file) = &work_product.saved_file {
         let obj_out =
-            tcx.output_filenames(()).temp_path(OutputType::Object, Some(&cgu.name().as_str()));
+            tcx.output_filenames(()).temp_path(OutputType::Object, Some(cgu.name().as_str()));
         object = Some(obj_out.clone());
         let source_file = rustc_incremental::in_incr_comp_dir_sess(&tcx.sess, &saved_file);
         if let Err(err) = rustc_fs_util::link_or_copy(&source_file, &obj_out) {
@@ -123,6 +124,7 @@ fn module_codegen(
         backend_config.clone(),
         module.isa(),
         tcx.sess.opts.debuginfo != DebugInfo::None,
+        cgu_name,
     );
     super::predefine_mono_items(tcx, &mut module, &mono_items);
     for (mono_item, _) in mono_items {
@@ -176,7 +178,7 @@ fn module_codegen(
         )
     });
 
-    codegen_global_asm(tcx, &cgu.name().as_str(), &cx.global_asm);
+    codegen_global_asm(tcx, cgu.name().as_str(), &cx.global_asm);
 
     codegen_result
 }
@@ -207,7 +209,7 @@ pub(crate) fn run_aot(
         cgus.iter()
             .map(|cgu| {
                 let cgu_reuse = determine_cgu_reuse(tcx, cgu);
-                tcx.sess.cgu_reuse_tracker.set_actual_reuse(&cgu.name().as_str(), cgu_reuse);
+                tcx.sess.cgu_reuse_tracker.set_actual_reuse(cgu.name().as_str(), cgu_reuse);
 
                 match cgu_reuse {
                     _ if backend_config.disable_incr_cache => {}
@@ -241,7 +243,7 @@ pub(crate) fn run_aot(
     let isa = crate::build_isa(tcx.sess, &backend_config);
     let mut allocator_module = make_module(tcx.sess, isa, "allocator_shim".to_string());
     assert_eq!(pointer_ty(tcx), allocator_module.target_config().pointer_type());
-    let mut allocator_unwind_context = UnwindContext::new(tcx, allocator_module.isa(), true);
+    let mut allocator_unwind_context = UnwindContext::new(allocator_module.isa(), true);
     let created_alloc_shim =
         crate::allocator::codegen(tcx, &mut allocator_module, &mut allocator_unwind_context);
 
@@ -277,7 +279,8 @@ pub(crate) fn run_aot(
             let tmp_file =
                 tcx.output_filenames(()).temp_path(OutputType::Metadata, Some(&metadata_cgu_name));
 
-            let obj = crate::metadata::new_metadata_object(tcx, &metadata_cgu_name, &metadata);
+            let symbol_name = rustc_middle::middle::exported_symbols::metadata_symbol_name(tcx);
+            let obj = create_compressed_metadata_file(tcx.sess, &metadata, &symbol_name);
 
             if let Err(err) = std::fs::write(&tmp_file, obj) {
                 tcx.sess.fatal(&format!("error writing metadata object file: {}", err));
diff --git a/compiler/rustc_codegen_cranelift/src/driver/jit.rs b/compiler/rustc_codegen_cranelift/src/driver/jit.rs
index 76fbc9ad51e..309d27090b5 100644
--- a/compiler/rustc_codegen_cranelift/src/driver/jit.rs
+++ b/compiler/rustc_codegen_cranelift/src/driver/jit.rs
@@ -3,7 +3,7 @@
 
 use std::cell::RefCell;
 use std::ffi::CString;
-use std::lazy::{Lazy, SyncOnceCell};
+use std::lazy::SyncOnceCell;
 use std::os::raw::{c_char, c_int};
 use std::sync::{mpsc, Mutex};
 
@@ -11,6 +11,7 @@ use cranelift_codegen::binemit::{NullStackMapSink, NullTrapSink};
 use rustc_codegen_ssa::CrateInfo;
 use rustc_middle::mir::mono::MonoItem;
 use rustc_session::Session;
+use rustc_span::Symbol;
 
 use cranelift_jit::{JITBuilder, JITModule};
 
@@ -23,7 +24,7 @@ struct JitState {
 }
 
 thread_local! {
-    static LAZY_JIT_STATE: RefCell<Option<JitState>> = RefCell::new(None);
+    static LAZY_JIT_STATE: RefCell<Option<JitState>> = const { RefCell::new(None) };
 }
 
 /// The Sender owned by the rustc thread
@@ -50,12 +51,11 @@ impl UnsafeMessage {
     fn send(self) -> Result<(), mpsc::SendError<UnsafeMessage>> {
         thread_local! {
             /// The Sender owned by the local thread
-            static LOCAL_MESSAGE_SENDER: Lazy<mpsc::Sender<UnsafeMessage>> = Lazy::new(||
+            static LOCAL_MESSAGE_SENDER: mpsc::Sender<UnsafeMessage> =
                 GLOBAL_MESSAGE_SENDER
                     .get().unwrap()
                     .lock().unwrap()
-                    .clone()
-            );
+                    .clone();
         }
         LOCAL_MESSAGE_SENDER.with(|sender| sender.send(self))
     }
@@ -76,7 +76,13 @@ fn create_jit_module<'tcx>(
     jit_builder.symbols(imported_symbols);
     let mut jit_module = JITModule::new(jit_builder);
 
-    let mut cx = crate::CodegenCx::new(tcx, backend_config.clone(), jit_module.isa(), false);
+    let mut cx = crate::CodegenCx::new(
+        tcx,
+        backend_config.clone(),
+        jit_module.isa(),
+        false,
+        Symbol::intern("dummy_cgu_name"),
+    );
 
     crate::allocator::codegen(tcx, &mut jit_module, &mut cx.unwind_context);
     crate::main_shim::maybe_create_entry_wrapper(
@@ -246,7 +252,13 @@ fn jit_fn(instance_ptr: *const Instance<'static>, trampoline_ptr: *const u8) ->
 
             jit_module.prepare_for_function_redefine(func_id).unwrap();
 
-            let mut cx = crate::CodegenCx::new(tcx, backend_config, jit_module.isa(), false);
+            let mut cx = crate::CodegenCx::new(
+                tcx,
+                backend_config,
+                jit_module.isa(),
+                false,
+                Symbol::intern("dummy_cgu_name"),
+            );
             tcx.sess.time("codegen fn", || crate::base::codegen_fn(&mut cx, jit_module, instance));
 
             assert!(cx.global_asm.is_empty());
diff --git a/compiler/rustc_codegen_cranelift/src/inline_asm.rs b/compiler/rustc_codegen_cranelift/src/inline_asm.rs
index 09c5e6031c7..c242c75ed18 100644
--- a/compiler/rustc_codegen_cranelift/src/inline_asm.rs
+++ b/compiler/rustc_codegen_cranelift/src/inline_asm.rs
@@ -1,4 +1,4 @@
-//! Codegen of [`asm!`] invocations.
+//! Codegen of `asm!` invocations.
 
 use crate::prelude::*;
 
@@ -6,6 +6,7 @@ use std::fmt::Write;
 
 use rustc_ast::ast::{InlineAsmOptions, InlineAsmTemplatePiece};
 use rustc_middle::mir::InlineAsmOperand;
+use rustc_span::sym;
 use rustc_target::asm::*;
 
 pub(crate) fn codegen_inline_asm<'tcx>(
@@ -17,10 +18,7 @@ pub(crate) fn codegen_inline_asm<'tcx>(
 ) {
     // FIXME add .eh_frame unwind info directives
 
-    if template.is_empty() {
-        // Black box
-        return;
-    } else if template[0] == InlineAsmTemplatePiece::String("int $$0x29".to_string()) {
+    if template[0] == InlineAsmTemplatePiece::String("int $$0x29".to_string()) {
         let true_ = fx.bcx.ins().iconst(types::I32, 1);
         fx.bcx.ins().trapnz(true_, TrapCode::User(1));
         return;
@@ -41,8 +39,10 @@ pub(crate) fn codegen_inline_asm<'tcx>(
         assert_eq!(operands.len(), 4);
         let (leaf, eax_place) = match operands[1] {
             InlineAsmOperand::InOut { reg, late: true, ref in_value, out_place } => {
-                let reg = expect_reg(reg);
-                assert_eq!(reg, InlineAsmReg::X86(X86InlineAsmReg::ax));
+                assert_eq!(
+                    reg,
+                    InlineAsmRegOrRegClass::Reg(InlineAsmReg::X86(X86InlineAsmReg::ax))
+                );
                 (
                     crate::base::codegen_operand(fx, in_value).load_scalar(fx),
                     crate::base::codegen_place(fx, out_place.unwrap()),
@@ -64,8 +64,10 @@ pub(crate) fn codegen_inline_asm<'tcx>(
         };
         let (sub_leaf, ecx_place) = match operands[2] {
             InlineAsmOperand::InOut { reg, late: true, ref in_value, out_place } => {
-                let reg = expect_reg(reg);
-                assert_eq!(reg, InlineAsmReg::X86(X86InlineAsmReg::cx));
+                assert_eq!(
+                    reg,
+                    InlineAsmRegOrRegClass::Reg(InlineAsmReg::X86(X86InlineAsmReg::cx))
+                );
                 (
                     crate::base::codegen_operand(fx, in_value).load_scalar(fx),
                     crate::base::codegen_place(fx, out_place.unwrap()),
@@ -75,8 +77,10 @@ pub(crate) fn codegen_inline_asm<'tcx>(
         };
         let edx_place = match operands[3] {
             InlineAsmOperand::Out { reg, late: true, place } => {
-                let reg = expect_reg(reg);
-                assert_eq!(reg, InlineAsmReg::X86(X86InlineAsmReg::dx));
+                assert_eq!(
+                    reg,
+                    InlineAsmRegOrRegClass::Reg(InlineAsmReg::X86(X86InlineAsmReg::dx))
+                );
                 crate::base::codegen_place(fx, place.unwrap())
             }
             _ => unreachable!(),
@@ -96,60 +100,59 @@ pub(crate) fn codegen_inline_asm<'tcx>(
         crate::trap::trap_unimplemented(fx, "Alloca is not supported");
     }
 
-    let mut slot_size = Size::from_bytes(0);
-    let mut clobbered_regs = Vec::new();
     let mut inputs = Vec::new();
     let mut outputs = Vec::new();
 
-    let mut new_slot = |reg_class: InlineAsmRegClass| {
-        let reg_size = reg_class
-            .supported_types(InlineAsmArch::X86_64)
-            .iter()
-            .map(|(ty, _)| ty.size())
-            .max()
-            .unwrap();
-        let align = rustc_target::abi::Align::from_bytes(reg_size.bytes()).unwrap();
-        slot_size = slot_size.align_to(align);
-        let offset = slot_size;
-        slot_size += reg_size;
-        offset
+    let mut asm_gen = InlineAssemblyGenerator {
+        tcx: fx.tcx,
+        arch: fx.tcx.sess.asm_arch.unwrap(),
+        template,
+        operands,
+        options,
+        registers: Vec::new(),
+        stack_slots_clobber: Vec::new(),
+        stack_slots_input: Vec::new(),
+        stack_slots_output: Vec::new(),
+        stack_slot_size: Size::from_bytes(0),
     };
+    asm_gen.allocate_registers();
+    asm_gen.allocate_stack_slots();
+
+    let inline_asm_index = fx.cx.inline_asm_index.get();
+    fx.cx.inline_asm_index.set(inline_asm_index + 1);
+    let asm_name = format!(
+        "__inline_asm_{}_n{}",
+        fx.cx.cgu_name.as_str().replace('.', "__").replace('-', "_"),
+        inline_asm_index
+    );
+
+    let generated_asm = asm_gen.generate_asm_wrapper(&asm_name);
+    fx.cx.global_asm.push_str(&generated_asm);
 
-    // FIXME overlap input and output slots to save stack space
-    for operand in operands {
+    for (i, operand) in operands.iter().enumerate() {
         match *operand {
-            InlineAsmOperand::In { reg, ref value } => {
-                let reg = expect_reg(reg);
-                clobbered_regs.push((reg, new_slot(reg.reg_class())));
+            InlineAsmOperand::In { reg: _, ref value } => {
                 inputs.push((
-                    reg,
-                    new_slot(reg.reg_class()),
+                    asm_gen.stack_slots_input[i].unwrap(),
                     crate::base::codegen_operand(fx, value).load_scalar(fx),
                 ));
             }
-            InlineAsmOperand::Out { reg, late: _, place } => {
-                let reg = expect_reg(reg);
-                clobbered_regs.push((reg, new_slot(reg.reg_class())));
+            InlineAsmOperand::Out { reg: _, late: _, place } => {
                 if let Some(place) = place {
                     outputs.push((
-                        reg,
-                        new_slot(reg.reg_class()),
+                        asm_gen.stack_slots_output[i].unwrap(),
                         crate::base::codegen_place(fx, place),
                     ));
                 }
             }
-            InlineAsmOperand::InOut { reg, late: _, ref in_value, out_place } => {
-                let reg = expect_reg(reg);
-                clobbered_regs.push((reg, new_slot(reg.reg_class())));
+            InlineAsmOperand::InOut { reg: _, late: _, ref in_value, out_place } => {
                 inputs.push((
-                    reg,
-                    new_slot(reg.reg_class()),
+                    asm_gen.stack_slots_input[i].unwrap(),
                     crate::base::codegen_operand(fx, in_value).load_scalar(fx),
                 ));
                 if let Some(out_place) = out_place {
                     outputs.push((
-                        reg,
-                        new_slot(reg.reg_class()),
+                        asm_gen.stack_slots_output[i].unwrap(),
                         crate::base::codegen_place(fx, out_place),
                     ));
                 }
@@ -160,110 +163,470 @@ pub(crate) fn codegen_inline_asm<'tcx>(
         }
     }
 
-    let inline_asm_index = fx.inline_asm_index;
-    fx.inline_asm_index += 1;
-    let asm_name = format!("{}__inline_asm_{}", fx.symbol_name, inline_asm_index);
-
-    let generated_asm = generate_asm_wrapper(
-        &asm_name,
-        InlineAsmArch::X86_64,
-        options,
-        template,
-        clobbered_regs,
-        &inputs,
-        &outputs,
-    );
-    fx.cx.global_asm.push_str(&generated_asm);
-
-    call_inline_asm(fx, &asm_name, slot_size, inputs, outputs);
+    call_inline_asm(fx, &asm_name, asm_gen.stack_slot_size, inputs, outputs);
 }
 
-fn generate_asm_wrapper(
-    asm_name: &str,
+struct InlineAssemblyGenerator<'a, 'tcx> {
+    tcx: TyCtxt<'tcx>,
     arch: InlineAsmArch,
+    template: &'a [InlineAsmTemplatePiece],
+    operands: &'a [InlineAsmOperand<'tcx>],
     options: InlineAsmOptions,
-    template: &[InlineAsmTemplatePiece],
-    clobbered_regs: Vec<(InlineAsmReg, Size)>,
-    inputs: &[(InlineAsmReg, Size, Value)],
-    outputs: &[(InlineAsmReg, Size, CPlace<'_>)],
-) -> String {
-    let mut generated_asm = String::new();
-    writeln!(generated_asm, ".globl {}", asm_name).unwrap();
-    writeln!(generated_asm, ".type {},@function", asm_name).unwrap();
-    writeln!(generated_asm, ".section .text.{},\"ax\",@progbits", asm_name).unwrap();
-    writeln!(generated_asm, "{}:", asm_name).unwrap();
-
-    generated_asm.push_str(".intel_syntax noprefix\n");
-    generated_asm.push_str("    push rbp\n");
-    generated_asm.push_str("    mov rbp,rdi\n");
-
-    // Save clobbered registers
-    if !options.contains(InlineAsmOptions::NORETURN) {
-        // FIXME skip registers saved by the calling convention
-        for &(reg, offset) in &clobbered_regs {
-            save_register(&mut generated_asm, arch, reg, offset);
+    registers: Vec<Option<InlineAsmReg>>,
+    stack_slots_clobber: Vec<Option<Size>>,
+    stack_slots_input: Vec<Option<Size>>,
+    stack_slots_output: Vec<Option<Size>>,
+    stack_slot_size: Size,
+}
+
+impl<'tcx> InlineAssemblyGenerator<'_, 'tcx> {
+    fn allocate_registers(&mut self) {
+        let sess = self.tcx.sess;
+        let map = allocatable_registers(self.arch, &sess.target_features, &sess.target);
+        let mut allocated = FxHashMap::<_, (bool, bool)>::default();
+        let mut regs = vec![None; self.operands.len()];
+
+        // Add explicit registers to the allocated set.
+        for (i, operand) in self.operands.iter().enumerate() {
+            match *operand {
+                InlineAsmOperand::In { reg: InlineAsmRegOrRegClass::Reg(reg), .. } => {
+                    regs[i] = Some(reg);
+                    allocated.entry(reg).or_default().0 = true;
+                }
+                InlineAsmOperand::Out {
+                    reg: InlineAsmRegOrRegClass::Reg(reg), late: true, ..
+                } => {
+                    regs[i] = Some(reg);
+                    allocated.entry(reg).or_default().1 = true;
+                }
+                InlineAsmOperand::Out { reg: InlineAsmRegOrRegClass::Reg(reg), .. }
+                | InlineAsmOperand::InOut { reg: InlineAsmRegOrRegClass::Reg(reg), .. } => {
+                    regs[i] = Some(reg);
+                    allocated.insert(reg, (true, true));
+                }
+                _ => (),
+            }
         }
-    }
 
-    // Write input registers
-    for &(reg, offset, _value) in inputs {
-        restore_register(&mut generated_asm, arch, reg, offset);
-    }
+        // Allocate out/inout/inlateout registers first because they are more constrained.
+        for (i, operand) in self.operands.iter().enumerate() {
+            match *operand {
+                InlineAsmOperand::Out {
+                    reg: InlineAsmRegOrRegClass::RegClass(class),
+                    late: false,
+                    ..
+                }
+                | InlineAsmOperand::InOut {
+                    reg: InlineAsmRegOrRegClass::RegClass(class), ..
+                } => {
+                    let mut alloc_reg = None;
+                    for &reg in &map[&class] {
+                        let mut used = false;
+                        reg.overlapping_regs(|r| {
+                            if allocated.contains_key(&r) {
+                                used = true;
+                            }
+                        });
+
+                        if !used {
+                            alloc_reg = Some(reg);
+                            break;
+                        }
+                    }
+
+                    let reg = alloc_reg.expect("cannot allocate registers");
+                    regs[i] = Some(reg);
+                    allocated.insert(reg, (true, true));
+                }
+                _ => (),
+            }
+        }
+
+        // Allocate in/lateout.
+        for (i, operand) in self.operands.iter().enumerate() {
+            match *operand {
+                InlineAsmOperand::In { reg: InlineAsmRegOrRegClass::RegClass(class), .. } => {
+                    let mut alloc_reg = None;
+                    for &reg in &map[&class] {
+                        let mut used = false;
+                        reg.overlapping_regs(|r| {
+                            if allocated.get(&r).copied().unwrap_or_default().0 {
+                                used = true;
+                            }
+                        });
+
+                        if !used {
+                            alloc_reg = Some(reg);
+                            break;
+                        }
+                    }
+
+                    let reg = alloc_reg.expect("cannot allocate registers");
+                    regs[i] = Some(reg);
+                    allocated.entry(reg).or_default().0 = true;
+                }
+                InlineAsmOperand::Out {
+                    reg: InlineAsmRegOrRegClass::RegClass(class),
+                    late: true,
+                    ..
+                } => {
+                    let mut alloc_reg = None;
+                    for &reg in &map[&class] {
+                        let mut used = false;
+                        reg.overlapping_regs(|r| {
+                            if allocated.get(&r).copied().unwrap_or_default().1 {
+                                used = true;
+                            }
+                        });
+
+                        if !used {
+                            alloc_reg = Some(reg);
+                            break;
+                        }
+                    }
+
+                    let reg = alloc_reg.expect("cannot allocate registers");
+                    regs[i] = Some(reg);
+                    allocated.entry(reg).or_default().1 = true;
+                }
+                _ => (),
+            }
+        }
 
-    if options.contains(InlineAsmOptions::ATT_SYNTAX) {
-        generated_asm.push_str(".att_syntax\n");
+        self.registers = regs;
     }
 
-    // The actual inline asm
-    for piece in template {
-        match piece {
-            InlineAsmTemplatePiece::String(s) => {
-                generated_asm.push_str(s);
+    fn allocate_stack_slots(&mut self) {
+        let mut slot_size = Size::from_bytes(0);
+        let mut slots_clobber = vec![None; self.operands.len()];
+        let mut slots_input = vec![None; self.operands.len()];
+        let mut slots_output = vec![None; self.operands.len()];
+
+        let new_slot_fn = |slot_size: &mut Size, reg_class: InlineAsmRegClass| {
+            let reg_size =
+                reg_class.supported_types(self.arch).iter().map(|(ty, _)| ty.size()).max().unwrap();
+            let align = rustc_target::abi::Align::from_bytes(reg_size.bytes()).unwrap();
+            let offset = slot_size.align_to(align);
+            *slot_size = offset + reg_size;
+            offset
+        };
+        let mut new_slot = |x| new_slot_fn(&mut slot_size, x);
+
+        // Allocate stack slots for saving clobbered registers
+        let abi_clobber = InlineAsmClobberAbi::parse(
+            self.arch,
+            &self.tcx.sess.target_features,
+            &self.tcx.sess.target,
+            sym::C,
+        )
+        .unwrap()
+        .clobbered_regs();
+        for (i, reg) in self.registers.iter().enumerate().filter_map(|(i, r)| r.map(|r| (i, r))) {
+            let mut need_save = true;
+            // If the register overlaps with a register clobbered by function call, then
+            // we don't need to save it.
+            for r in abi_clobber {
+                r.overlapping_regs(|r| {
+                    if r == reg {
+                        need_save = false;
+                    }
+                });
+
+                if !need_save {
+                    break;
+                }
+            }
+
+            if need_save {
+                slots_clobber[i] = Some(new_slot(reg.reg_class()));
             }
-            InlineAsmTemplatePiece::Placeholder { operand_idx: _, modifier: _, span: _ } => todo!(),
         }
+
+        // Allocate stack slots for inout
+        for (i, operand) in self.operands.iter().enumerate() {
+            match *operand {
+                InlineAsmOperand::InOut { reg, out_place: Some(_), .. } => {
+                    let slot = new_slot(reg.reg_class());
+                    slots_input[i] = Some(slot);
+                    slots_output[i] = Some(slot);
+                }
+                _ => (),
+            }
+        }
+
+        let slot_size_before_input = slot_size;
+        let mut new_slot = |x| new_slot_fn(&mut slot_size, x);
+
+        // Allocate stack slots for input
+        for (i, operand) in self.operands.iter().enumerate() {
+            match *operand {
+                InlineAsmOperand::In { reg, .. }
+                | InlineAsmOperand::InOut { reg, out_place: None, .. } => {
+                    slots_input[i] = Some(new_slot(reg.reg_class()));
+                }
+                _ => (),
+            }
+        }
+
+        // Reset slot size to before input so that input and output operands can overlap
+        // and save some memory.
+        let slot_size_after_input = slot_size;
+        slot_size = slot_size_before_input;
+        let mut new_slot = |x| new_slot_fn(&mut slot_size, x);
+
+        // Allocate stack slots for output
+        for (i, operand) in self.operands.iter().enumerate() {
+            match *operand {
+                InlineAsmOperand::Out { reg, place: Some(_), .. } => {
+                    slots_output[i] = Some(new_slot(reg.reg_class()));
+                }
+                _ => (),
+            }
+        }
+
+        slot_size = slot_size.max(slot_size_after_input);
+
+        self.stack_slots_clobber = slots_clobber;
+        self.stack_slots_input = slots_input;
+        self.stack_slots_output = slots_output;
+        self.stack_slot_size = slot_size;
     }
-    generated_asm.push('\n');
 
-    if options.contains(InlineAsmOptions::ATT_SYNTAX) {
-        generated_asm.push_str(".intel_syntax noprefix\n");
+    fn generate_asm_wrapper(&self, asm_name: &str) -> String {
+        let mut generated_asm = String::new();
+        writeln!(generated_asm, ".globl {}", asm_name).unwrap();
+        writeln!(generated_asm, ".type {},@function", asm_name).unwrap();
+        writeln!(generated_asm, ".section .text.{},\"ax\",@progbits", asm_name).unwrap();
+        writeln!(generated_asm, "{}:", asm_name).unwrap();
+
+        let is_x86 = matches!(self.arch, InlineAsmArch::X86 | InlineAsmArch::X86_64);
+
+        if is_x86 {
+            generated_asm.push_str(".intel_syntax noprefix\n");
+        }
+        Self::prologue(&mut generated_asm, self.arch);
+
+        // Save clobbered registers
+        if !self.options.contains(InlineAsmOptions::NORETURN) {
+            for (reg, slot) in self
+                .registers
+                .iter()
+                .zip(self.stack_slots_clobber.iter().copied())
+                .filter_map(|(r, s)| r.zip(s))
+            {
+                Self::save_register(&mut generated_asm, self.arch, reg, slot);
+            }
+        }
+
+        // Write input registers
+        for (reg, slot) in self
+            .registers
+            .iter()
+            .zip(self.stack_slots_input.iter().copied())
+            .filter_map(|(r, s)| r.zip(s))
+        {
+            Self::restore_register(&mut generated_asm, self.arch, reg, slot);
+        }
+
+        if is_x86 && self.options.contains(InlineAsmOptions::ATT_SYNTAX) {
+            generated_asm.push_str(".att_syntax\n");
+        }
+
+        // The actual inline asm
+        for piece in self.template {
+            match piece {
+                InlineAsmTemplatePiece::String(s) => {
+                    generated_asm.push_str(s);
+                }
+                InlineAsmTemplatePiece::Placeholder { operand_idx, modifier, span: _ } => {
+                    if self.options.contains(InlineAsmOptions::ATT_SYNTAX) {
+                        generated_asm.push('%');
+                    }
+                    self.registers[*operand_idx]
+                        .unwrap()
+                        .emit(&mut generated_asm, self.arch, *modifier)
+                        .unwrap();
+                }
+            }
+        }
+        generated_asm.push('\n');
+
+        if is_x86 && self.options.contains(InlineAsmOptions::ATT_SYNTAX) {
+            generated_asm.push_str(".intel_syntax noprefix\n");
+        }
+
+        if !self.options.contains(InlineAsmOptions::NORETURN) {
+            // Read output registers
+            for (reg, slot) in self
+                .registers
+                .iter()
+                .zip(self.stack_slots_output.iter().copied())
+                .filter_map(|(r, s)| r.zip(s))
+            {
+                Self::save_register(&mut generated_asm, self.arch, reg, slot);
+            }
+
+            // Restore clobbered registers
+            for (reg, slot) in self
+                .registers
+                .iter()
+                .zip(self.stack_slots_clobber.iter().copied())
+                .filter_map(|(r, s)| r.zip(s))
+            {
+                Self::restore_register(&mut generated_asm, self.arch, reg, slot);
+            }
+
+            Self::epilogue(&mut generated_asm, self.arch);
+        } else {
+            Self::epilogue_noreturn(&mut generated_asm, self.arch);
+        }
+
+        if is_x86 {
+            generated_asm.push_str(".att_syntax\n");
+        }
+        writeln!(generated_asm, ".size {name}, .-{name}", name = asm_name).unwrap();
+        generated_asm.push_str(".text\n");
+        generated_asm.push_str("\n\n");
+
+        generated_asm
     }
 
-    if !options.contains(InlineAsmOptions::NORETURN) {
-        // Read output registers
-        for &(reg, offset, _place) in outputs {
-            save_register(&mut generated_asm, arch, reg, offset);
+    fn prologue(generated_asm: &mut String, arch: InlineAsmArch) {
+        match arch {
+            InlineAsmArch::X86 => {
+                generated_asm.push_str("    push ebp\n");
+                generated_asm.push_str("    mov ebp,[esp+8]\n");
+            }
+            InlineAsmArch::X86_64 => {
+                generated_asm.push_str("    push rbp\n");
+                generated_asm.push_str("    mov rbp,rdi\n");
+            }
+            InlineAsmArch::RiscV32 => {
+                generated_asm.push_str("    addi sp, sp, -8\n");
+                generated_asm.push_str("    sw ra, 4(sp)\n");
+                generated_asm.push_str("    sw s0, 0(sp)\n");
+                generated_asm.push_str("    mv s0, a0\n");
+            }
+            InlineAsmArch::RiscV64 => {
+                generated_asm.push_str("    addi sp, sp, -16\n");
+                generated_asm.push_str("    sd ra, 8(sp)\n");
+                generated_asm.push_str("    sd s0, 0(sp)\n");
+                generated_asm.push_str("    mv s0, a0\n");
+            }
+            _ => unimplemented!("prologue for {:?}", arch),
         }
+    }
 
-        // Restore clobbered registers
-        for &(reg, offset) in clobbered_regs.iter().rev() {
-            restore_register(&mut generated_asm, arch, reg, offset);
+    fn epilogue(generated_asm: &mut String, arch: InlineAsmArch) {
+        match arch {
+            InlineAsmArch::X86 => {
+                generated_asm.push_str("    pop ebp\n");
+                generated_asm.push_str("    ret\n");
+            }
+            InlineAsmArch::X86_64 => {
+                generated_asm.push_str("    pop rbp\n");
+                generated_asm.push_str("    ret\n");
+            }
+            InlineAsmArch::RiscV32 => {
+                generated_asm.push_str("    lw s0, 0(sp)\n");
+                generated_asm.push_str("    lw ra, 4(sp)\n");
+                generated_asm.push_str("    addi sp, sp, 8\n");
+                generated_asm.push_str("    ret\n");
+            }
+            InlineAsmArch::RiscV64 => {
+                generated_asm.push_str("    ld s0, 0(sp)\n");
+                generated_asm.push_str("    ld ra, 8(sp)\n");
+                generated_asm.push_str("    addi sp, sp, 16\n");
+                generated_asm.push_str("    ret\n");
+            }
+            _ => unimplemented!("epilogue for {:?}", arch),
         }
+    }
 
-        generated_asm.push_str("    pop rbp\n");
-        generated_asm.push_str("    ret\n");
-    } else {
-        generated_asm.push_str("    ud2\n");
+    fn epilogue_noreturn(generated_asm: &mut String, arch: InlineAsmArch) {
+        match arch {
+            InlineAsmArch::X86 | InlineAsmArch::X86_64 => {
+                generated_asm.push_str("    ud2\n");
+            }
+            InlineAsmArch::RiscV32 | InlineAsmArch::RiscV64 => {
+                generated_asm.push_str("    ebreak\n");
+            }
+            _ => unimplemented!("epilogue_noreturn for {:?}", arch),
+        }
     }
 
-    generated_asm.push_str(".att_syntax\n");
-    writeln!(generated_asm, ".size {name}, .-{name}", name = asm_name).unwrap();
-    generated_asm.push_str(".text\n");
-    generated_asm.push_str("\n\n");
+    fn save_register(
+        generated_asm: &mut String,
+        arch: InlineAsmArch,
+        reg: InlineAsmReg,
+        offset: Size,
+    ) {
+        match arch {
+            InlineAsmArch::X86 => {
+                write!(generated_asm, "    mov [ebp+0x{:x}], ", offset.bytes()).unwrap();
+                reg.emit(generated_asm, InlineAsmArch::X86, None).unwrap();
+                generated_asm.push('\n');
+            }
+            InlineAsmArch::X86_64 => {
+                write!(generated_asm, "    mov [rbp+0x{:x}], ", offset.bytes()).unwrap();
+                reg.emit(generated_asm, InlineAsmArch::X86_64, None).unwrap();
+                generated_asm.push('\n');
+            }
+            InlineAsmArch::RiscV32 => {
+                generated_asm.push_str("    sw ");
+                reg.emit(generated_asm, InlineAsmArch::RiscV32, None).unwrap();
+                writeln!(generated_asm, ", 0x{:x}(s0)", offset.bytes()).unwrap();
+            }
+            InlineAsmArch::RiscV64 => {
+                generated_asm.push_str("    sd ");
+                reg.emit(generated_asm, InlineAsmArch::RiscV64, None).unwrap();
+                writeln!(generated_asm, ", 0x{:x}(s0)", offset.bytes()).unwrap();
+            }
+            _ => unimplemented!("save_register for {:?}", arch),
+        }
+    }
 
-    generated_asm
+    fn restore_register(
+        generated_asm: &mut String,
+        arch: InlineAsmArch,
+        reg: InlineAsmReg,
+        offset: Size,
+    ) {
+        match arch {
+            InlineAsmArch::X86 => {
+                generated_asm.push_str("    mov ");
+                reg.emit(generated_asm, InlineAsmArch::X86, None).unwrap();
+                writeln!(generated_asm, ", [ebp+0x{:x}]", offset.bytes()).unwrap();
+            }
+            InlineAsmArch::X86_64 => {
+                generated_asm.push_str("    mov ");
+                reg.emit(generated_asm, InlineAsmArch::X86_64, None).unwrap();
+                writeln!(generated_asm, ", [rbp+0x{:x}]", offset.bytes()).unwrap();
+            }
+            InlineAsmArch::RiscV32 => {
+                generated_asm.push_str("    lw ");
+                reg.emit(generated_asm, InlineAsmArch::RiscV32, None).unwrap();
+                writeln!(generated_asm, ", 0x{:x}(s0)", offset.bytes()).unwrap();
+            }
+            InlineAsmArch::RiscV64 => {
+                generated_asm.push_str("    ld ");
+                reg.emit(generated_asm, InlineAsmArch::RiscV64, None).unwrap();
+                writeln!(generated_asm, ", 0x{:x}(s0)", offset.bytes()).unwrap();
+            }
+            _ => unimplemented!("restore_register for {:?}", arch),
+        }
+    }
 }
 
 fn call_inline_asm<'tcx>(
     fx: &mut FunctionCx<'_, '_, 'tcx>,
     asm_name: &str,
     slot_size: Size,
-    inputs: Vec<(InlineAsmReg, Size, Value)>,
-    outputs: Vec<(InlineAsmReg, Size, CPlace<'tcx>)>,
+    inputs: Vec<(Size, Value)>,
+    outputs: Vec<(Size, CPlace<'tcx>)>,
 ) {
     let stack_slot = fx.bcx.func.create_stack_slot(StackSlotData {
         kind: StackSlotKind::ExplicitSlot,
-        offset: None,
         size: u32::try_from(slot_size.bytes()).unwrap(),
     });
     if fx.clif_comments.enabled() {
@@ -287,50 +650,16 @@ fn call_inline_asm<'tcx>(
         fx.add_comment(inline_asm_func, asm_name);
     }
 
-    for (_reg, offset, value) in inputs {
+    for (offset, value) in inputs {
         fx.bcx.ins().stack_store(value, stack_slot, i32::try_from(offset.bytes()).unwrap());
     }
 
     let stack_slot_addr = fx.bcx.ins().stack_addr(fx.pointer_type, stack_slot, 0);
     fx.bcx.ins().call(inline_asm_func, &[stack_slot_addr]);
 
-    for (_reg, offset, place) in outputs {
+    for (offset, place) in outputs {
         let ty = fx.clif_type(place.layout().ty).unwrap();
         let value = fx.bcx.ins().stack_load(ty, stack_slot, i32::try_from(offset.bytes()).unwrap());
         place.write_cvalue(fx, CValue::by_val(value, place.layout()));
     }
 }
-
-fn expect_reg(reg_or_class: InlineAsmRegOrRegClass) -> InlineAsmReg {
-    match reg_or_class {
-        InlineAsmRegOrRegClass::Reg(reg) => reg,
-        InlineAsmRegOrRegClass::RegClass(class) => unimplemented!("{:?}", class),
-    }
-}
-
-fn save_register(generated_asm: &mut String, arch: InlineAsmArch, reg: InlineAsmReg, offset: Size) {
-    match arch {
-        InlineAsmArch::X86_64 => {
-            write!(generated_asm, "    mov [rbp+0x{:x}], ", offset.bytes()).unwrap();
-            reg.emit(generated_asm, InlineAsmArch::X86_64, None).unwrap();
-            generated_asm.push('\n');
-        }
-        _ => unimplemented!("save_register for {:?}", arch),
-    }
-}
-
-fn restore_register(
-    generated_asm: &mut String,
-    arch: InlineAsmArch,
-    reg: InlineAsmReg,
-    offset: Size,
-) {
-    match arch {
-        InlineAsmArch::X86_64 => {
-            generated_asm.push_str("    mov ");
-            reg.emit(generated_asm, InlineAsmArch::X86_64, None).unwrap();
-            writeln!(generated_asm, ", [rbp+0x{:x}]", offset.bytes()).unwrap();
-        }
-        _ => unimplemented!("restore_register for {:?}", arch),
-    }
-}
diff --git a/compiler/rustc_codegen_cranelift/src/intrinsics/mod.rs b/compiler/rustc_codegen_cranelift/src/intrinsics/mod.rs
index 313b62c5770..55c9b4d9ba1 100644
--- a/compiler/rustc_codegen_cranelift/src/intrinsics/mod.rs
+++ b/compiler/rustc_codegen_cranelift/src/intrinsics/mod.rs
@@ -90,7 +90,7 @@ macro call_intrinsic_match {
         match $intrinsic {
             $(
                 sym::$name => {
-                    assert!($substs.is_noop());
+                    assert!($substs.is_empty());
                     if let [$(ref $arg),*] = *$args {
                         let ($($arg,)*) = (
                             $(codegen_operand($fx, $arg),)*
@@ -503,10 +503,10 @@ pub(crate) fn codegen_intrinsic_call<'tcx>(
 
             if intrinsic == sym::copy_nonoverlapping {
                 // FIXME emit_small_memcpy
-                fx.bcx.call_memcpy(fx.module.target_config(), dst, src, byte_amount);
+                fx.bcx.call_memcpy(fx.target_config, dst, src, byte_amount);
             } else {
                 // FIXME emit_small_memmove
-                fx.bcx.call_memmove(fx.module.target_config(), dst, src, byte_amount);
+                fx.bcx.call_memmove(fx.target_config, dst, src, byte_amount);
             }
         };
         // NOTE: the volatile variants have src and dst swapped
@@ -522,10 +522,10 @@ pub(crate) fn codegen_intrinsic_call<'tcx>(
             // FIXME make the copy actually volatile when using emit_small_mem{cpy,move}
             if intrinsic == sym::volatile_copy_nonoverlapping_memory {
                 // FIXME emit_small_memcpy
-                fx.bcx.call_memcpy(fx.module.target_config(), dst, src, byte_amount);
+                fx.bcx.call_memcpy(fx.target_config, dst, src, byte_amount);
             } else {
                 // FIXME emit_small_memmove
-                fx.bcx.call_memmove(fx.module.target_config(), dst, src, byte_amount);
+                fx.bcx.call_memmove(fx.target_config, dst, src, byte_amount);
             }
         };
         size_of_val, <T> (c ptr) {
@@ -673,7 +673,7 @@ pub(crate) fn codegen_intrinsic_call<'tcx>(
             let dst_ptr = dst.load_scalar(fx);
             // FIXME make the memset actually volatile when switching to emit_small_memset
             // FIXME use emit_small_memset
-            fx.bcx.call_memset(fx.module.target_config(), dst_ptr, val, count);
+            fx.bcx.call_memset(fx.target_config, dst_ptr, val, count);
         };
         ctlz | ctlz_nonzero, <T> (v arg) {
             // FIXME trap on `ctlz_nonzero` with zero arg.
@@ -1067,7 +1067,7 @@ pub(crate) fn codegen_intrinsic_call<'tcx>(
         kw.Try, (v f, v data, v _catch_fn) {
             // FIXME once unwinding is supported, change this to actually catch panics
             let f_sig = fx.bcx.func.import_signature(Signature {
-                call_conv: CallConv::triple_default(fx.triple()),
+                call_conv: fx.target_config.default_call_conv,
                 params: vec![AbiParam::new(fx.bcx.func.dfg.value_type(data))],
                 returns: vec![],
             });
diff --git a/compiler/rustc_codegen_cranelift/src/intrinsics/simd.rs b/compiler/rustc_codegen_cranelift/src/intrinsics/simd.rs
index 43e68b4afa9..6c0631d9ecb 100644
--- a/compiler/rustc_codegen_cranelift/src/intrinsics/simd.rs
+++ b/compiler/rustc_codegen_cranelift/src/intrinsics/simd.rs
@@ -67,7 +67,34 @@ pub(super) fn codegen_simd_intrinsic_call<'tcx>(
         _ if intrinsic.as_str().starts_with("simd_shuffle"), (c x, c y, o idx) {
             validate_simd_type!(fx, intrinsic, span, x.layout().ty);
 
-            let n: u16 = intrinsic.as_str()["simd_shuffle".len()..].parse().unwrap();
+            // If this intrinsic is the older "simd_shuffleN" form, simply parse the integer.
+            // If there is no suffix, use the index array length.
+            let n: u16 = if intrinsic == sym::simd_shuffle {
+                // Make sure this is actually an array, since typeck only checks the length-suffixed
+                // version of this intrinsic.
+                let idx_ty = fx.monomorphize(idx.ty(fx.mir, fx.tcx));
+                match idx_ty.kind() {
+                    ty::Array(ty, len) if matches!(ty.kind(), ty::Uint(ty::UintTy::U32)) => {
+                        len.try_eval_usize(fx.tcx, ty::ParamEnv::reveal_all()).unwrap_or_else(|| {
+                            span_bug!(span, "could not evaluate shuffle index array length")
+                        }).try_into().unwrap()
+                    }
+                    _ => {
+                        fx.tcx.sess.span_err(
+                            span,
+                            &format!(
+                                "simd_shuffle index must be an array of `u32`, got `{}`",
+                                idx_ty,
+                            ),
+                        );
+                        // Prevent verifier error
+                        crate::trap::trap_unreachable(fx, "compilation should not have succeeded");
+                        return;
+                    }
+                }
+            } else {
+                intrinsic.as_str()["simd_shuffle".len()..].parse().unwrap()
+            };
 
             assert_eq!(x.layout(), y.layout());
             let layout = x.layout();
@@ -378,27 +405,27 @@ pub(super) fn codegen_simd_intrinsic_call<'tcx>(
         };
 
         simd_reduce_min, (c v) {
-            // FIXME support floats
             validate_simd_type!(fx, intrinsic, span, v.layout().ty);
             simd_reduce(fx, v, None, ret, |fx, layout, a, b| {
-                let lt = fx.bcx.ins().icmp(if layout.ty.is_signed() {
-                    IntCC::SignedLessThan
-                } else {
-                    IntCC::UnsignedLessThan
-                }, a, b);
+                let lt = match layout.ty.kind() {
+                    ty::Int(_) => fx.bcx.ins().icmp(IntCC::SignedLessThan, a, b),
+                    ty::Uint(_) => fx.bcx.ins().icmp(IntCC::UnsignedLessThan, a, b),
+                    ty::Float(_) => fx.bcx.ins().fcmp(FloatCC::LessThan, a, b),
+                    _ => unreachable!(),
+                };
                 fx.bcx.ins().select(lt, a, b)
             });
         };
 
         simd_reduce_max, (c v) {
-            // FIXME support floats
             validate_simd_type!(fx, intrinsic, span, v.layout().ty);
             simd_reduce(fx, v, None, ret, |fx, layout, a, b| {
-                let gt = fx.bcx.ins().icmp(if layout.ty.is_signed() {
-                    IntCC::SignedGreaterThan
-                } else {
-                    IntCC::UnsignedGreaterThan
-                }, a, b);
+                let gt = match layout.ty.kind() {
+                    ty::Int(_) => fx.bcx.ins().icmp(IntCC::SignedGreaterThan, a, b),
+                    ty::Uint(_) => fx.bcx.ins().icmp(IntCC::UnsignedGreaterThan, a, b),
+                    ty::Float(_) => fx.bcx.ins().fcmp(FloatCC::GreaterThan, a, b),
+                    _ => unreachable!(),
+                };
                 fx.bcx.ins().select(gt, a, b)
             });
         };
diff --git a/compiler/rustc_codegen_cranelift/src/lib.rs b/compiler/rustc_codegen_cranelift/src/lib.rs
index beb97edf09e..cb18f42f741 100644
--- a/compiler/rustc_codegen_cranelift/src/lib.rs
+++ b/compiler/rustc_codegen_cranelift/src/lib.rs
@@ -4,7 +4,6 @@
 #![warn(unused_lifetimes)]
 #![warn(unreachable_pub)]
 
-extern crate snap;
 #[macro_use]
 extern crate rustc_middle;
 extern crate rustc_ast;
@@ -26,6 +25,7 @@ extern crate rustc_target;
 extern crate rustc_driver;
 
 use std::any::Any;
+use std::cell::Cell;
 
 use rustc_codegen_ssa::traits::CodegenBackend;
 use rustc_codegen_ssa::CodegenResults;
@@ -34,6 +34,7 @@ use rustc_metadata::EncodedMetadata;
 use rustc_middle::dep_graph::{WorkProduct, WorkProductId};
 use rustc_session::config::OutputFilenames;
 use rustc_session::Session;
+use rustc_span::Symbol;
 
 use cranelift_codegen::isa::TargetIsa;
 use cranelift_codegen::settings::{self, Configurable};
@@ -59,7 +60,6 @@ mod inline_asm;
 mod intrinsics;
 mod linkage;
 mod main_shim;
-mod metadata;
 mod num;
 mod optimize;
 mod pointer;
@@ -71,9 +71,7 @@ mod value_and_place;
 mod vtable;
 
 mod prelude {
-    pub(crate) use std::convert::{TryFrom, TryInto};
-
-    pub(crate) use rustc_span::{Span, FileNameDisplayPreference};
+    pub(crate) use rustc_span::{FileNameDisplayPreference, Span};
 
     pub(crate) use rustc_hir::def_id::{DefId, LOCAL_CRATE};
     pub(crate) use rustc_middle::bug;
@@ -125,9 +123,11 @@ impl<F: Fn() -> String> Drop for PrintOnPanic<F> {
 struct CodegenCx<'tcx> {
     tcx: TyCtxt<'tcx>,
     global_asm: String,
+    inline_asm_index: Cell<usize>,
     cached_context: Context,
     debug_context: Option<DebugContext<'tcx>>,
     unwind_context: UnwindContext,
+    cgu_name: Symbol,
 }
 
 impl<'tcx> CodegenCx<'tcx> {
@@ -136,18 +136,21 @@ impl<'tcx> CodegenCx<'tcx> {
         backend_config: BackendConfig,
         isa: &dyn TargetIsa,
         debug_info: bool,
+        cgu_name: Symbol,
     ) -> Self {
         assert_eq!(pointer_ty(tcx), isa.pointer_type());
 
         let unwind_context =
-            UnwindContext::new(tcx, isa, matches!(backend_config.codegen_mode, CodegenMode::Aot));
+            UnwindContext::new(isa, matches!(backend_config.codegen_mode, CodegenMode::Aot));
         let debug_context = if debug_info { Some(DebugContext::new(tcx, isa)) } else { None };
         CodegenCx {
             tcx,
             global_asm: String::new(),
+            inline_asm_index: Cell::new(0),
             cached_context: Context::new(),
             debug_context,
             unwind_context,
+            cgu_name,
         }
     }
 }
@@ -205,6 +208,7 @@ impl CodegenBackend for CraneliftCodegenBackend {
         &self,
         ongoing_codegen: Box<dyn Any>,
         _sess: &Session,
+        _outputs: &OutputFilenames,
     ) -> Result<(CodegenResults, FxHashMap<WorkProductId, WorkProduct>), ErrorReported> {
         Ok(*ongoing_codegen
             .downcast::<(CodegenResults, FxHashMap<WorkProductId, WorkProduct>)>()
@@ -268,19 +272,16 @@ fn build_isa(sess: &Session, backend_config: &BackendConfig) -> Box<dyn isa::Tar
 
     let flags = settings::Flags::new(flags_builder);
 
-    let variant = cranelift_codegen::isa::BackendVariant::MachInst;
-
     let isa_builder = match sess.opts.cg.target_cpu.as_deref() {
         Some("native") => {
-            let builder = cranelift_native::builder_with_options(variant, true).unwrap();
+            let builder = cranelift_native::builder_with_options(true).unwrap();
             builder
         }
         Some(value) => {
             let mut builder =
-                cranelift_codegen::isa::lookup_variant(target_triple.clone(), variant)
-                    .unwrap_or_else(|err| {
-                        sess.fatal(&format!("can't compile for {}: {}", target_triple, err));
-                    });
+                cranelift_codegen::isa::lookup(target_triple.clone()).unwrap_or_else(|err| {
+                    sess.fatal(&format!("can't compile for {}: {}", target_triple, err));
+                });
             if let Err(_) = builder.enable(value) {
                 sess.fatal("the specified target cpu isn't currently supported by Cranelift.");
             }
@@ -288,10 +289,9 @@ fn build_isa(sess: &Session, backend_config: &BackendConfig) -> Box<dyn isa::Tar
         }
         None => {
             let mut builder =
-                cranelift_codegen::isa::lookup_variant(target_triple.clone(), variant)
-                    .unwrap_or_else(|err| {
-                        sess.fatal(&format!("can't compile for {}: {}", target_triple, err));
-                    });
+                cranelift_codegen::isa::lookup(target_triple.clone()).unwrap_or_else(|err| {
+                    sess.fatal(&format!("can't compile for {}: {}", target_triple, err));
+                });
             if target_triple.architecture == target_lexicon::Architecture::X86_64 {
                 // Don't use "haswell" as the default, as it implies `has_lzcnt`.
                 // macOS CI is still at Ivy Bridge EP, so `lzcnt` is interpreted as `bsr`.
diff --git a/compiler/rustc_codegen_cranelift/src/metadata.rs b/compiler/rustc_codegen_cranelift/src/metadata.rs
deleted file mode 100644
index 1c8fd0b01d9..00000000000
--- a/compiler/rustc_codegen_cranelift/src/metadata.rs
+++ /dev/null
@@ -1,76 +0,0 @@
-//! Writing of the rustc metadata for dylibs
-
-use object::write::{Object, StandardSegment, Symbol, SymbolSection};
-use object::{SectionKind, SymbolFlags, SymbolKind, SymbolScope};
-
-use rustc_metadata::EncodedMetadata;
-use rustc_middle::ty::TyCtxt;
-
-// Adapted from https://github.com/rust-lang/rust/blob/da573206f87b5510de4b0ee1a9c044127e409bd3/src/librustc_codegen_llvm/base.rs#L47-L112
-pub(crate) fn new_metadata_object(
-    tcx: TyCtxt<'_>,
-    cgu_name: &str,
-    metadata: &EncodedMetadata,
-) -> Vec<u8> {
-    use snap::write::FrameEncoder;
-    use std::io::Write;
-
-    let mut compressed = rustc_metadata::METADATA_HEADER.to_vec();
-    FrameEncoder::new(&mut compressed).write_all(metadata.raw_data()).unwrap();
-
-    let triple = crate::target_triple(tcx.sess);
-
-    let binary_format = match triple.binary_format {
-        target_lexicon::BinaryFormat::Elf => object::BinaryFormat::Elf,
-        target_lexicon::BinaryFormat::Coff => object::BinaryFormat::Coff,
-        target_lexicon::BinaryFormat::Macho => object::BinaryFormat::MachO,
-        binary_format => tcx.sess.fatal(&format!("binary format {} is unsupported", binary_format)),
-    };
-    let architecture = match triple.architecture {
-        target_lexicon::Architecture::Aarch64(_) => object::Architecture::Aarch64,
-        target_lexicon::Architecture::Arm(_) => object::Architecture::Arm,
-        target_lexicon::Architecture::Avr => object::Architecture::Avr,
-        target_lexicon::Architecture::Hexagon => object::Architecture::Hexagon,
-        target_lexicon::Architecture::Mips32(_) => object::Architecture::Mips,
-        target_lexicon::Architecture::Mips64(_) => object::Architecture::Mips64,
-        target_lexicon::Architecture::Msp430 => object::Architecture::Msp430,
-        target_lexicon::Architecture::Powerpc => object::Architecture::PowerPc,
-        target_lexicon::Architecture::Powerpc64 => object::Architecture::PowerPc64,
-        target_lexicon::Architecture::Powerpc64le => todo!(),
-        target_lexicon::Architecture::Riscv32(_) => object::Architecture::Riscv32,
-        target_lexicon::Architecture::Riscv64(_) => object::Architecture::Riscv64,
-        target_lexicon::Architecture::S390x => object::Architecture::S390x,
-        target_lexicon::Architecture::Sparc64 => object::Architecture::Sparc64,
-        target_lexicon::Architecture::Sparcv9 => object::Architecture::Sparc64,
-        target_lexicon::Architecture::X86_32(_) => object::Architecture::I386,
-        target_lexicon::Architecture::X86_64 => object::Architecture::X86_64,
-        architecture => {
-            tcx.sess.fatal(&format!("target architecture {:?} is unsupported", architecture,))
-        }
-    };
-    let endian = match triple.endianness().unwrap() {
-        target_lexicon::Endianness::Little => object::Endianness::Little,
-        target_lexicon::Endianness::Big => object::Endianness::Big,
-    };
-
-    let mut object = Object::new(binary_format, architecture, endian);
-    object.add_file_symbol(cgu_name.as_bytes().to_vec());
-
-    let segment = object.segment_name(StandardSegment::Data).to_vec();
-    let section_id = object.add_section(segment, b".rustc".to_vec(), SectionKind::Data);
-    let offset = object.append_section_data(section_id, &compressed, 1);
-    // For MachO and probably PE this is necessary to prevent the linker from throwing away the
-    // .rustc section. For ELF this isn't necessary, but it also doesn't harm.
-    object.add_symbol(Symbol {
-        name: rustc_middle::middle::exported_symbols::metadata_symbol_name(tcx).into_bytes(),
-        value: offset,
-        size: compressed.len() as u64,
-        kind: SymbolKind::Data,
-        scope: SymbolScope::Dynamic,
-        weak: false,
-        section: SymbolSection::Section(section_id),
-        flags: SymbolFlags::None,
-    });
-
-    object.write().unwrap()
-}
diff --git a/compiler/rustc_codegen_cranelift/src/pretty_clif.rs b/compiler/rustc_codegen_cranelift/src/pretty_clif.rs
index ec846d71960..4dffb89e105 100644
--- a/compiler/rustc_codegen_cranelift/src/pretty_clif.rs
+++ b/compiler/rustc_codegen_cranelift/src/pretty_clif.rs
@@ -57,7 +57,7 @@ use std::io::Write;
 
 use cranelift_codegen::{
     entity::SecondaryMap,
-    ir::{entities::AnyEntity, function::DisplayFunctionAnnotations},
+    ir::entities::AnyEntity,
     write::{FuncWriter, PlainWriter},
 };
 
@@ -129,7 +129,6 @@ impl FuncWriter for &'_ CommentWriter {
         &mut self,
         w: &mut dyn fmt::Write,
         func: &Function,
-        reg_info: Option<&isa::RegInfo>,
     ) -> Result<bool, fmt::Error> {
         for comment in &self.global_comments {
             if !comment.is_empty() {
@@ -142,7 +141,7 @@ impl FuncWriter for &'_ CommentWriter {
             writeln!(w)?;
         }
 
-        self.super_preamble(w, func, reg_info)
+        self.super_preamble(w, func)
     }
 
     fn write_entity_definition(
@@ -165,11 +164,10 @@ impl FuncWriter for &'_ CommentWriter {
         &mut self,
         w: &mut dyn fmt::Write,
         func: &Function,
-        isa: Option<&dyn isa::TargetIsa>,
         block: Block,
         indent: usize,
     ) -> fmt::Result {
-        PlainWriter.write_block_header(w, func, isa, block, indent)
+        PlainWriter.write_block_header(w, func, block, indent)
     }
 
     fn write_instruction(
@@ -177,11 +175,10 @@ impl FuncWriter for &'_ CommentWriter {
         w: &mut dyn fmt::Write,
         func: &Function,
         aliases: &SecondaryMap<Value, Vec<Value>>,
-        isa: Option<&dyn isa::TargetIsa>,
         inst: Inst,
         indent: usize,
     ) -> fmt::Result {
-        PlainWriter.write_instruction(w, func, aliases, isa, inst, indent)?;
+        PlainWriter.write_instruction(w, func, aliases, inst, indent)?;
         if let Some(comment) = self.entity_comments.get(&inst.into()) {
             writeln!(w, "; {}", comment.replace('\n', "\n; "))?;
         }
@@ -249,7 +246,6 @@ pub(crate) fn write_clif_file<'tcx>(
                 &mut clif_comments,
                 &mut clif,
                 &context.func,
-                &DisplayFunctionAnnotations { isa: Some(isa), value_ranges: None },
             )
             .unwrap();
 
@@ -278,7 +274,6 @@ impl fmt::Debug for FunctionCx<'_, '_, '_> {
             &mut &self.clif_comments,
             &mut clif,
             &self.bcx.func,
-            &DisplayFunctionAnnotations::default(),
         )
         .unwrap();
         writeln!(f, "\n{}", clif)
diff --git a/compiler/rustc_codegen_cranelift/src/trap.rs b/compiler/rustc_codegen_cranelift/src/trap.rs
index fe8d20fa39f..99b5366e349 100644
--- a/compiler/rustc_codegen_cranelift/src/trap.rs
+++ b/compiler/rustc_codegen_cranelift/src/trap.rs
@@ -9,7 +9,7 @@ fn codegen_print(fx: &mut FunctionCx<'_, '_, '_>, msg: &str) {
             "puts",
             Linkage::Import,
             &Signature {
-                call_conv: CallConv::triple_default(fx.triple()),
+                call_conv: fx.target_config.default_call_conv,
                 params: vec![AbiParam::new(fx.pointer_type)],
                 returns: vec![AbiParam::new(types::I32)],
             },
diff --git a/compiler/rustc_codegen_cranelift/src/value_and_place.rs b/compiler/rustc_codegen_cranelift/src/value_and_place.rs
index 30d5340935f..f29d13ccabd 100644
--- a/compiler/rustc_codegen_cranelift/src/value_and_place.rs
+++ b/compiler/rustc_codegen_cranelift/src/value_and_place.rs
@@ -329,7 +329,6 @@ impl<'tcx> CPlace<'tcx> {
             // FIXME Don't force the size to a multiple of 16 bytes once Cranelift gets a way to
             // specify stack slot alignment.
             size: (u32::try_from(layout.size.bytes()).unwrap() + 15) / 16 * 16,
-            offset: None,
         });
         CPlace { inner: CPlaceInner::Addr(Pointer::stack_slot(stack_slot), None), layout }
     }
@@ -472,7 +471,6 @@ impl<'tcx> CPlace<'tcx> {
                         // FIXME Don't force the size to a multiple of 16 bytes once Cranelift gets a way to
                         // specify stack slot alignment.
                         size: (src_ty.bytes() + 15) / 16 * 16,
-                        offset: None,
                     });
                     let ptr = Pointer::stack_slot(stack_slot);
                     ptr.store(fx, data, MemFlags::trusted());
@@ -512,6 +510,26 @@ impl<'tcx> CPlace<'tcx> {
         let dst_layout = self.layout();
         let to_ptr = match self.inner {
             CPlaceInner::Var(_local, var) => {
+                if let ty::Array(element, len) = dst_layout.ty.kind() {
+                    // Can only happen for vector types
+                    let len =
+                        u16::try_from(len.eval_usize(fx.tcx, ParamEnv::reveal_all())).unwrap();
+                    let vector_ty = fx.clif_type(element).unwrap().by(len).unwrap();
+
+                    let data = match from.0 {
+                        CValueInner::ByRef(ptr, None) => {
+                            let mut flags = MemFlags::new();
+                            flags.set_notrap();
+                            ptr.load(fx, vector_ty, flags)
+                        }
+                        CValueInner::ByVal(_)
+                        | CValueInner::ByValPair(_, _)
+                        | CValueInner::ByRef(_, Some(_)) => bug!("array should be ByRef"),
+                    };
+
+                    fx.bcx.def_var(var, data);
+                    return;
+                }
                 let data = CValue(from.0, dst_layout).load_scalar(fx);
                 let dst_ty = fx.clif_type(self.layout().ty).unwrap();
                 transmute_value(fx, var, data, dst_ty);
@@ -583,7 +601,7 @@ impl<'tcx> CPlace<'tcx> {
                 let src_align = src_layout.align.abi.bytes() as u8;
                 let dst_align = dst_layout.align.abi.bytes() as u8;
                 fx.bcx.emit_small_memory_copy(
-                    fx.module.target_config(),
+                    fx.target_config,
                     to_addr,
                     from_addr,
                     size,
@@ -605,14 +623,39 @@ impl<'tcx> CPlace<'tcx> {
         let layout = self.layout();
 
         match self.inner {
-            CPlaceInner::Var(local, var) => {
-                if let Abi::Vector { .. } = layout.abi {
+            CPlaceInner::Var(local, var) => match layout.ty.kind() {
+                ty::Array(_, _) => {
+                    // Can only happen for vector types
                     return CPlace {
                         inner: CPlaceInner::VarLane(local, var, field.as_u32().try_into().unwrap()),
                         layout: layout.field(fx, field.as_u32().try_into().unwrap()),
                     };
                 }
-            }
+                ty::Adt(adt_def, substs) if layout.ty.is_simd() => {
+                    let f0_ty = adt_def.non_enum_variant().fields[0].ty(fx.tcx, substs);
+
+                    match f0_ty.kind() {
+                        ty::Array(_, _) => {
+                            assert_eq!(field.as_u32(), 0);
+                            return CPlace {
+                                inner: CPlaceInner::Var(local, var),
+                                layout: layout.field(fx, field.as_u32().try_into().unwrap()),
+                            };
+                        }
+                        _ => {
+                            return CPlace {
+                                inner: CPlaceInner::VarLane(
+                                    local,
+                                    var,
+                                    field.as_u32().try_into().unwrap(),
+                                ),
+                                layout: layout.field(fx, field.as_u32().try_into().unwrap()),
+                            };
+                        }
+                    }
+                }
+                _ => {}
+            },
             CPlaceInner::VarPair(local, var1, var2) => {
                 let layout = layout.field(&*fx, field.index());
 
@@ -629,7 +672,12 @@ impl<'tcx> CPlace<'tcx> {
 
         let (field_ptr, field_layout) = codegen_field(fx, base, extra, layout, field);
         if field_layout.is_unsized() {
-            CPlace::for_ptr_with_extra(field_ptr, extra.unwrap(), field_layout)
+            if let ty::Foreign(_) = field_layout.ty.kind() {
+                assert!(extra.is_none());
+                CPlace::for_ptr(field_ptr, field_layout)
+            } else {
+                CPlace::for_ptr_with_extra(field_ptr, extra.unwrap(), field_layout)
+            }
         } else {
             CPlace::for_ptr(field_ptr, field_layout)
         }