about summary refs log tree commit diff
path: root/compiler/rustc_codegen_cranelift/src/optimize
diff options
context:
space:
mode:
Diffstat (limited to 'compiler/rustc_codegen_cranelift/src/optimize')
-rw-r--r--compiler/rustc_codegen_cranelift/src/optimize/code_layout.rs34
-rw-r--r--compiler/rustc_codegen_cranelift/src/optimize/mod.rs30
-rw-r--r--compiler/rustc_codegen_cranelift/src/optimize/peephole.rs106
-rw-r--r--compiler/rustc_codegen_cranelift/src/optimize/stack2reg.rs481
4 files changed, 651 insertions, 0 deletions
diff --git a/compiler/rustc_codegen_cranelift/src/optimize/code_layout.rs b/compiler/rustc_codegen_cranelift/src/optimize/code_layout.rs
new file mode 100644
index 00000000000..ca9ff15ec10
--- /dev/null
+++ b/compiler/rustc_codegen_cranelift/src/optimize/code_layout.rs
@@ -0,0 +1,34 @@
+//! This optimization moves cold code to the end of the function.
+//!
+//! Some code is executed much less often than other code. For example panicking or the
+//! landingpads for unwinding. By moving this cold code to the end of the function the average
+//! amount of jumps is reduced and the code locality is improved.
+//!
+//! # Undefined behaviour
+//!
+//! This optimization doesn't assume anything that isn't already assumed by Cranelift itself.
+
+use crate::prelude::*;
+
+pub(super) fn optimize_function(ctx: &mut Context, cold_blocks: &EntitySet<Block>) {
+    // FIXME Move the block in place instead of remove and append once
+    // bytecodealliance/cranelift#1339 is implemented.
+
+    let mut block_insts = FxHashMap::default();
+    for block in cold_blocks.keys().filter(|&block| cold_blocks.contains(block)) {
+        let insts = ctx.func.layout.block_insts(block).collect::<Vec<_>>();
+        for &inst in &insts {
+            ctx.func.layout.remove_inst(inst);
+        }
+        block_insts.insert(block, insts);
+        ctx.func.layout.remove_block(block);
+    }
+
+    // And then append them at the back again.
+    for block in cold_blocks.keys().filter(|&block| cold_blocks.contains(block)) {
+        ctx.func.layout.append_block(block);
+        for inst in block_insts.remove(&block).unwrap() {
+            ctx.func.layout.append_inst(inst, block);
+        }
+    }
+}
diff --git a/compiler/rustc_codegen_cranelift/src/optimize/mod.rs b/compiler/rustc_codegen_cranelift/src/optimize/mod.rs
new file mode 100644
index 00000000000..389f50e797e
--- /dev/null
+++ b/compiler/rustc_codegen_cranelift/src/optimize/mod.rs
@@ -0,0 +1,30 @@
+//! Various optimizations specific to cg_clif
+
+use crate::prelude::*;
+
+mod code_layout;
+pub(crate) mod peephole;
+mod stack2reg;
+
+pub(crate) fn optimize_function<'tcx>(
+    tcx: TyCtxt<'tcx>,
+    instance: Instance<'tcx>,
+    ctx: &mut Context,
+    cold_blocks: &EntitySet<Block>,
+    clif_comments: &mut crate::pretty_clif::CommentWriter,
+) {
+    // The code_layout optimization is very cheap.
+    self::code_layout::optimize_function(ctx, cold_blocks);
+
+    if tcx.sess.opts.optimize == rustc_session::config::OptLevel::No {
+        return; // FIXME classify optimizations over opt levels
+    }
+
+    // FIXME(#1142) stack2reg miscompiles lewton
+    if false {
+        self::stack2reg::optimize_function(ctx, clif_comments);
+    }
+
+    crate::pretty_clif::write_clif_file(tcx, "stack2reg", None, instance, &ctx, &*clif_comments);
+    crate::base::verify_func(tcx, &*clif_comments, &ctx.func);
+}
diff --git a/compiler/rustc_codegen_cranelift/src/optimize/peephole.rs b/compiler/rustc_codegen_cranelift/src/optimize/peephole.rs
new file mode 100644
index 00000000000..b95e2d72877
--- /dev/null
+++ b/compiler/rustc_codegen_cranelift/src/optimize/peephole.rs
@@ -0,0 +1,106 @@
+//! Peephole optimizations that can be performed while creating clif ir.
+
+use cranelift_codegen::ir::{
+    condcodes::IntCC, types, InstBuilder, InstructionData, Opcode, Value, ValueDef,
+};
+use cranelift_frontend::FunctionBuilder;
+
+/// If the given value was produced by a `bint` instruction, return it's input, otherwise return the
+/// given value.
+pub(crate) fn maybe_unwrap_bint(bcx: &mut FunctionBuilder<'_>, arg: Value) -> Value {
+    if let ValueDef::Result(arg_inst, 0) = bcx.func.dfg.value_def(arg) {
+        match bcx.func.dfg[arg_inst] {
+            InstructionData::Unary { opcode: Opcode::Bint, arg } => arg,
+            _ => arg,
+        }
+    } else {
+        arg
+    }
+}
+
+/// If the given value was produced by the lowering of `Rvalue::Not` return the input and true,
+/// otherwise return the given value and false.
+pub(crate) fn maybe_unwrap_bool_not(bcx: &mut FunctionBuilder<'_>, arg: Value) -> (Value, bool) {
+    if let ValueDef::Result(arg_inst, 0) = bcx.func.dfg.value_def(arg) {
+        match bcx.func.dfg[arg_inst] {
+            // This is the lowering of `Rvalue::Not`
+            InstructionData::IntCompareImm {
+                opcode: Opcode::IcmpImm,
+                cond: IntCC::Equal,
+                arg,
+                imm,
+            } if imm.bits() == 0 => (arg, true),
+            _ => (arg, false),
+        }
+    } else {
+        (arg, false)
+    }
+}
+
+pub(crate) fn make_branchable_value(bcx: &mut FunctionBuilder<'_>, arg: Value) -> Value {
+    if bcx.func.dfg.value_type(arg).is_bool() {
+        return arg;
+    }
+
+    (|| {
+        let arg_inst = if let ValueDef::Result(arg_inst, 0) = bcx.func.dfg.value_def(arg) {
+            arg_inst
+        } else {
+            return None;
+        };
+
+        match bcx.func.dfg[arg_inst] {
+            // This is the lowering of Rvalue::Not
+            InstructionData::Load { opcode: Opcode::Load, arg: ptr, flags, offset } => {
+                // Using `load.i8 + uextend.i32` would legalize to `uload8 + ireduce.i8 +
+                // uextend.i32`. Just `uload8` is much faster.
+                match bcx.func.dfg.ctrl_typevar(arg_inst) {
+                    types::I8 => Some(bcx.ins().uload8(types::I32, flags, ptr, offset)),
+                    types::I16 => Some(bcx.ins().uload16(types::I32, flags, ptr, offset)),
+                    _ => None,
+                }
+            }
+            _ => None,
+        }
+    })()
+    .unwrap_or_else(|| {
+        match bcx.func.dfg.value_type(arg) {
+            types::I8 | types::I16 => {
+                // WORKAROUND for brz.i8 and brnz.i8 not yet being implemented
+                bcx.ins().uextend(types::I32, arg)
+            }
+            _ => arg,
+        }
+    })
+}
+
+/// Returns whether the branch is statically known to be taken or `None` if it isn't statically known.
+pub(crate) fn maybe_known_branch_taken(
+    bcx: &FunctionBuilder<'_>,
+    arg: Value,
+    test_zero: bool,
+) -> Option<bool> {
+    let arg_inst = if let ValueDef::Result(arg_inst, 0) = bcx.func.dfg.value_def(arg) {
+        arg_inst
+    } else {
+        return None;
+    };
+
+    match bcx.func.dfg[arg_inst] {
+        InstructionData::UnaryBool { opcode: Opcode::Bconst, imm } => {
+            if test_zero {
+                Some(!imm)
+            } else {
+                Some(imm)
+            }
+        }
+        InstructionData::UnaryImm { opcode: Opcode::Iconst, imm } => {
+            if test_zero {
+                Some(imm.bits() == 0)
+            } else {
+                Some(imm.bits() != 0)
+            }
+        }
+        _ => None,
+    }
+}
diff --git a/compiler/rustc_codegen_cranelift/src/optimize/stack2reg.rs b/compiler/rustc_codegen_cranelift/src/optimize/stack2reg.rs
new file mode 100644
index 00000000000..d111f37f5e4
--- /dev/null
+++ b/compiler/rustc_codegen_cranelift/src/optimize/stack2reg.rs
@@ -0,0 +1,481 @@
+//! This optimization replaces stack accesses with SSA variables and removes dead stores when possible.
+//!
+//! # Undefined behaviour
+//!
+//! This optimization is based on the assumption that stack slots which don't have their address
+//! leaked through `stack_addr` are only accessed using `stack_load` and `stack_store` in the
+//! function which has the stack slots. This optimization also assumes that stack slot accesses
+//! are never out of bounds. If these assumptions are not correct, then this optimization may remove
+//! `stack_store` instruction incorrectly, or incorrectly use a previously stored value as the value
+//! being loaded by a `stack_load`.
+
+use std::collections::BTreeMap;
+use std::fmt;
+use std::ops::Not;
+
+use rustc_data_structures::fx::FxHashSet;
+
+use cranelift_codegen::cursor::{Cursor, FuncCursor};
+use cranelift_codegen::ir::immediates::Offset32;
+use cranelift_codegen::ir::{InstructionData, Opcode, ValueDef};
+
+use crate::prelude::*;
+
+/// Workaround for `StackSlot` not implementing `Ord`.
+#[derive(Copy, Clone, PartialEq, Eq)]
+struct OrdStackSlot(StackSlot);
+
+impl fmt::Debug for OrdStackSlot {
+    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
+        write!(f, "{:?}", self.0)
+    }
+}
+
+impl PartialOrd for OrdStackSlot {
+    fn partial_cmp(&self, rhs: &Self) -> Option<std::cmp::Ordering> {
+        self.0.as_u32().partial_cmp(&rhs.0.as_u32())
+    }
+}
+
+impl Ord for OrdStackSlot {
+    fn cmp(&self, rhs: &Self) -> std::cmp::Ordering {
+        self.0.as_u32().cmp(&rhs.0.as_u32())
+    }
+}
+
+#[derive(Debug, Default)]
+struct StackSlotUsage {
+    stack_addr: FxHashSet<Inst>,
+    stack_load: FxHashSet<Inst>,
+    stack_store: FxHashSet<Inst>,
+}
+
+impl StackSlotUsage {
+    fn potential_stores_for_load(&self, ctx: &Context, load: Inst) -> Vec<Inst> {
+        self.stack_store
+            .iter()
+            .cloned()
+            .filter(|&store| {
+                match spatial_overlap(&ctx.func, store, load) {
+                    SpatialOverlap::No => false, // Can never be the source of the loaded value.
+                    SpatialOverlap::Partial | SpatialOverlap::Full => true,
+                }
+            })
+            .filter(|&store| {
+                match temporal_order(ctx, store, load) {
+                    TemporalOrder::NeverBefore => false, // Can never be the source of the loaded value.
+                    TemporalOrder::MaybeBefore | TemporalOrder::DefinitivelyBefore => true,
+                }
+            })
+            .collect::<Vec<Inst>>()
+    }
+
+    fn potential_loads_of_store(&self, ctx: &Context, store: Inst) -> Vec<Inst> {
+        self.stack_load
+            .iter()
+            .cloned()
+            .filter(|&load| {
+                match spatial_overlap(&ctx.func, store, load) {
+                    SpatialOverlap::No => false, // Can never be the source of the loaded value.
+                    SpatialOverlap::Partial | SpatialOverlap::Full => true,
+                }
+            })
+            .filter(|&load| {
+                match temporal_order(ctx, store, load) {
+                    TemporalOrder::NeverBefore => false, // Can never be the source of the loaded value.
+                    TemporalOrder::MaybeBefore | TemporalOrder::DefinitivelyBefore => true,
+                }
+            })
+            .collect::<Vec<Inst>>()
+    }
+
+    fn remove_unused_stack_addr(func: &mut Function, inst: Inst) {
+        func.dfg.detach_results(inst);
+        func.dfg.replace(inst).nop();
+    }
+
+    fn remove_unused_load(func: &mut Function, load: Inst) {
+        func.dfg.detach_results(load);
+        func.dfg.replace(load).nop();
+    }
+
+    fn remove_dead_store(&mut self, func: &mut Function, store: Inst) {
+        func.dfg.replace(store).nop();
+        self.stack_store.remove(&store);
+    }
+
+    fn change_load_to_alias(&mut self, func: &mut Function, load: Inst, value: Value) {
+        let loaded_value = func.dfg.inst_results(load)[0];
+        let loaded_type = func.dfg.value_type(loaded_value);
+
+        if func.dfg.value_type(value) == loaded_type {
+            func.dfg.detach_results(load);
+            func.dfg.replace(load).nop();
+            func.dfg.change_to_alias(loaded_value, value);
+        } else {
+            func.dfg.replace(load).bitcast(loaded_type, value);
+        }
+
+        self.stack_load.remove(&load);
+    }
+}
+
+struct OptimizeContext<'a> {
+    ctx: &'a mut Context,
+    stack_slot_usage_map: BTreeMap<OrdStackSlot, StackSlotUsage>,
+}
+
+impl<'a> OptimizeContext<'a> {
+    fn for_context(ctx: &'a mut Context) -> Self {
+        ctx.flowgraph(); // Compute cfg and domtree.
+
+        // Record all stack_addr, stack_load and stack_store instructions.
+        let mut stack_slot_usage_map = BTreeMap::<OrdStackSlot, StackSlotUsage>::new();
+
+        let mut cursor = FuncCursor::new(&mut ctx.func);
+        while let Some(_block) = cursor.next_block() {
+            while let Some(inst) = cursor.next_inst() {
+                match cursor.func.dfg[inst] {
+                    InstructionData::StackLoad {
+                        opcode: Opcode::StackAddr,
+                        stack_slot,
+                        offset: _,
+                    } => {
+                        stack_slot_usage_map
+                            .entry(OrdStackSlot(stack_slot))
+                            .or_insert_with(StackSlotUsage::default)
+                            .stack_addr
+                            .insert(inst);
+                    }
+                    InstructionData::StackLoad {
+                        opcode: Opcode::StackLoad,
+                        stack_slot,
+                        offset: _,
+                    } => {
+                        stack_slot_usage_map
+                            .entry(OrdStackSlot(stack_slot))
+                            .or_insert_with(StackSlotUsage::default)
+                            .stack_load
+                            .insert(inst);
+                    }
+                    InstructionData::StackStore {
+                        opcode: Opcode::StackStore,
+                        arg: _,
+                        stack_slot,
+                        offset: _,
+                    } => {
+                        stack_slot_usage_map
+                            .entry(OrdStackSlot(stack_slot))
+                            .or_insert_with(StackSlotUsage::default)
+                            .stack_store
+                            .insert(inst);
+                    }
+                    _ => {}
+                }
+            }
+        }
+
+        OptimizeContext { ctx, stack_slot_usage_map }
+    }
+}
+
+pub(super) fn optimize_function(
+    ctx: &mut Context,
+    #[cfg_attr(not(debug_assertions), allow(unused_variables))]
+    clif_comments: &mut crate::pretty_clif::CommentWriter,
+) {
+    combine_stack_addr_with_load_store(&mut ctx.func);
+
+    let mut opt_ctx = OptimizeContext::for_context(ctx);
+
+    // FIXME Repeat following instructions until fixpoint.
+
+    remove_unused_stack_addr_and_stack_load(&mut opt_ctx);
+
+    #[cfg(debug_assertions)]
+    {
+        for (&OrdStackSlot(stack_slot), usage) in &opt_ctx.stack_slot_usage_map {
+            clif_comments.add_comment(stack_slot, format!("used by: {:?}", usage));
+        }
+    }
+
+    for (stack_slot, users) in opt_ctx.stack_slot_usage_map.iter_mut() {
+        if users.stack_addr.is_empty().not() {
+            // Stack addr leaked; there may be unknown loads and stores.
+            // FIXME use stacked borrows to optimize
+            continue;
+        }
+
+        for load in users.stack_load.clone().into_iter() {
+            let potential_stores = users.potential_stores_for_load(&opt_ctx.ctx, load);
+
+            #[cfg(debug_assertions)]
+            for &store in &potential_stores {
+                clif_comments.add_comment(
+                    load,
+                    format!(
+                        "Potential store -> load forwarding {} -> {} ({:?}, {:?})",
+                        opt_ctx.ctx.func.dfg.display_inst(store, None),
+                        opt_ctx.ctx.func.dfg.display_inst(load, None),
+                        spatial_overlap(&opt_ctx.ctx.func, store, load),
+                        temporal_order(&opt_ctx.ctx, store, load),
+                    ),
+                );
+            }
+
+            match *potential_stores {
+                [] => {
+                    #[cfg(debug_assertions)]
+                    clif_comments
+                        .add_comment(load, "[BUG?] Reading uninitialized memory".to_string());
+                }
+                [store]
+                    if spatial_overlap(&opt_ctx.ctx.func, store, load) == SpatialOverlap::Full
+                        && temporal_order(&opt_ctx.ctx, store, load)
+                            == TemporalOrder::DefinitivelyBefore =>
+                {
+                    // Only one store could have been the origin of the value.
+                    let stored_value = opt_ctx.ctx.func.dfg.inst_args(store)[0];
+
+                    #[cfg(debug_assertions)]
+                    clif_comments
+                        .add_comment(load, format!("Store to load forward {} -> {}", store, load));
+
+                    users.change_load_to_alias(&mut opt_ctx.ctx.func, load, stored_value);
+                }
+                _ => {} // FIXME implement this
+            }
+        }
+
+        for store in users.stack_store.clone().into_iter() {
+            let potential_loads = users.potential_loads_of_store(&opt_ctx.ctx, store);
+
+            #[cfg(debug_assertions)]
+            for &load in &potential_loads {
+                clif_comments.add_comment(
+                    store,
+                    format!(
+                        "Potential load from store {} <- {} ({:?}, {:?})",
+                        opt_ctx.ctx.func.dfg.display_inst(load, None),
+                        opt_ctx.ctx.func.dfg.display_inst(store, None),
+                        spatial_overlap(&opt_ctx.ctx.func, store, load),
+                        temporal_order(&opt_ctx.ctx, store, load),
+                    ),
+                );
+            }
+
+            if potential_loads.is_empty() {
+                // Never loaded; can safely remove all stores and the stack slot.
+                // FIXME also remove stores when there is always a next store before a load.
+
+                #[cfg(debug_assertions)]
+                clif_comments.add_comment(
+                    store,
+                    format!(
+                        "Remove dead stack store {} of {}",
+                        opt_ctx.ctx.func.dfg.display_inst(store, None),
+                        stack_slot.0
+                    ),
+                );
+
+                users.remove_dead_store(&mut opt_ctx.ctx.func, store);
+            }
+        }
+
+        if users.stack_store.is_empty() && users.stack_load.is_empty() {
+            opt_ctx.ctx.func.stack_slots[stack_slot.0].size = 0;
+        }
+    }
+}
+
+fn combine_stack_addr_with_load_store(func: &mut Function) {
+    // Turn load and store into stack_load and stack_store when possible.
+    let mut cursor = FuncCursor::new(func);
+    while let Some(_block) = cursor.next_block() {
+        while let Some(inst) = cursor.next_inst() {
+            match cursor.func.dfg[inst] {
+                InstructionData::Load { opcode: Opcode::Load, arg: addr, flags: _, offset } => {
+                    if cursor.func.dfg.ctrl_typevar(inst) == types::I128
+                        || cursor.func.dfg.ctrl_typevar(inst).is_vector()
+                    {
+                        continue; // WORKAROUD: stack_load.i128 not yet implemented
+                    }
+                    if let Some((stack_slot, stack_addr_offset)) =
+                        try_get_stack_slot_and_offset_for_addr(cursor.func, addr)
+                    {
+                        if let Some(combined_offset) = offset.try_add_i64(stack_addr_offset.into())
+                        {
+                            let ty = cursor.func.dfg.ctrl_typevar(inst);
+                            cursor.func.dfg.replace(inst).stack_load(
+                                ty,
+                                stack_slot,
+                                combined_offset,
+                            );
+                        }
+                    }
+                }
+                InstructionData::Store {
+                    opcode: Opcode::Store,
+                    args: [value, addr],
+                    flags: _,
+                    offset,
+                } => {
+                    if cursor.func.dfg.ctrl_typevar(inst) == types::I128
+                        || cursor.func.dfg.ctrl_typevar(inst).is_vector()
+                    {
+                        continue; // WORKAROUND: stack_store.i128 not yet implemented
+                    }
+                    if let Some((stack_slot, stack_addr_offset)) =
+                        try_get_stack_slot_and_offset_for_addr(cursor.func, addr)
+                    {
+                        if let Some(combined_offset) = offset.try_add_i64(stack_addr_offset.into())
+                        {
+                            cursor.func.dfg.replace(inst).stack_store(
+                                value,
+                                stack_slot,
+                                combined_offset,
+                            );
+                        }
+                    }
+                }
+                _ => {}
+            }
+        }
+    }
+}
+
+fn remove_unused_stack_addr_and_stack_load(opt_ctx: &mut OptimizeContext<'_>) {
+    // FIXME incrementally rebuild on each call?
+    let mut stack_addr_load_insts_users = FxHashMap::<Inst, FxHashSet<Inst>>::default();
+
+    let mut cursor = FuncCursor::new(&mut opt_ctx.ctx.func);
+    while let Some(_block) = cursor.next_block() {
+        while let Some(inst) = cursor.next_inst() {
+            for &arg in cursor.func.dfg.inst_args(inst) {
+                if let ValueDef::Result(arg_origin, 0) = cursor.func.dfg.value_def(arg) {
+                    match cursor.func.dfg[arg_origin].opcode() {
+                        Opcode::StackAddr | Opcode::StackLoad => {
+                            stack_addr_load_insts_users
+                                .entry(arg_origin)
+                                .or_insert_with(FxHashSet::default)
+                                .insert(inst);
+                        }
+                        _ => {}
+                    }
+                }
+            }
+        }
+    }
+
+    #[cfg(debug_assertions)]
+    for inst in stack_addr_load_insts_users.keys() {
+        let mut is_recorded_stack_addr_or_stack_load = false;
+        for stack_slot_users in opt_ctx.stack_slot_usage_map.values() {
+            is_recorded_stack_addr_or_stack_load |= stack_slot_users.stack_addr.contains(inst)
+                || stack_slot_users.stack_load.contains(inst);
+        }
+        assert!(is_recorded_stack_addr_or_stack_load);
+    }
+
+    // Replace all unused stack_addr and stack_load instructions with nop.
+    let mut func = &mut opt_ctx.ctx.func;
+
+    for stack_slot_users in opt_ctx.stack_slot_usage_map.values_mut() {
+        stack_slot_users
+            .stack_addr
+            .drain_filter(|inst| {
+                stack_addr_load_insts_users.get(inst).map(|users| users.is_empty()).unwrap_or(true)
+            })
+            .for_each(|inst| StackSlotUsage::remove_unused_stack_addr(&mut func, inst));
+
+        stack_slot_users
+            .stack_load
+            .drain_filter(|inst| {
+                stack_addr_load_insts_users.get(inst).map(|users| users.is_empty()).unwrap_or(true)
+            })
+            .for_each(|inst| StackSlotUsage::remove_unused_load(&mut func, inst));
+    }
+}
+
+fn try_get_stack_slot_and_offset_for_addr(
+    func: &Function,
+    addr: Value,
+) -> Option<(StackSlot, Offset32)> {
+    if let ValueDef::Result(addr_inst, 0) = func.dfg.value_def(addr) {
+        if let InstructionData::StackLoad { opcode: Opcode::StackAddr, stack_slot, offset } =
+            func.dfg[addr_inst]
+        {
+            return Some((stack_slot, offset));
+        }
+    }
+    None
+}
+
+#[derive(Copy, Clone, Debug, PartialEq, Eq)]
+enum SpatialOverlap {
+    No,
+    Partial,
+    Full,
+}
+
+fn spatial_overlap(func: &Function, src: Inst, dest: Inst) -> SpatialOverlap {
+    fn inst_info(func: &Function, inst: Inst) -> (StackSlot, Offset32, u32) {
+        match func.dfg[inst] {
+            InstructionData::StackLoad { opcode: Opcode::StackAddr, stack_slot, offset }
+            | InstructionData::StackLoad { opcode: Opcode::StackLoad, stack_slot, offset }
+            | InstructionData::StackStore {
+                opcode: Opcode::StackStore,
+                stack_slot,
+                offset,
+                arg: _,
+            } => (stack_slot, offset, func.dfg.ctrl_typevar(inst).bytes()),
+            _ => unreachable!("{:?}", func.dfg[inst]),
+        }
+    }
+
+    debug_assert_ne!(src, dest);
+
+    let (src_ss, src_offset, src_size) = inst_info(func, src);
+    let (dest_ss, dest_offset, dest_size) = inst_info(func, dest);
+
+    if src_ss != dest_ss {
+        return SpatialOverlap::No;
+    }
+
+    if src_offset == dest_offset && src_size == dest_size {
+        return SpatialOverlap::Full;
+    }
+
+    let src_end: i64 = src_offset.try_add_i64(i64::from(src_size)).unwrap().into();
+    let dest_end: i64 = dest_offset.try_add_i64(i64::from(dest_size)).unwrap().into();
+    if src_end <= dest_offset.into() || dest_end <= src_offset.into() {
+        return SpatialOverlap::No;
+    }
+
+    SpatialOverlap::Partial
+}
+
+#[derive(Copy, Clone, Debug, PartialEq, Eq)]
+enum TemporalOrder {
+    /// `src` will never be executed before `dest`.
+    NeverBefore,
+
+    /// `src` may be executed before `dest`.
+    MaybeBefore,
+
+    /// `src` will always be executed before `dest`.
+    /// There may still be other instructions in between.
+    DefinitivelyBefore,
+}
+
+fn temporal_order(ctx: &Context, src: Inst, dest: Inst) -> TemporalOrder {
+    debug_assert_ne!(src, dest);
+
+    if ctx.domtree.dominates(src, dest, &ctx.func.layout) {
+        TemporalOrder::DefinitivelyBefore
+    } else if ctx.domtree.dominates(src, dest, &ctx.func.layout) {
+        TemporalOrder::NeverBefore
+    } else {
+        TemporalOrder::MaybeBefore
+    }
+}