about summary refs log tree commit diff
path: root/compiler/rustc_codegen_ssa
diff options
context:
space:
mode:
authorbors <bors@rust-lang.org>2024-03-23 13:57:55 +0000
committerbors <bors@rust-lang.org>2024-03-23 13:57:55 +0000
commitd6eb0f5a09247ff8443e68b4d17e0350c74bafb1 (patch)
treeaa13e444d15635e819516aa6b016ab83061d7022 /compiler/rustc_codegen_ssa
parentc3b05c6e5b5b59613350b8c2875b0add67ed74df (diff)
parent75d2e5b1236afc5f8211726ac24887f59f47f09b (diff)
downloadrust-d6eb0f5a09247ff8443e68b4d17e0350c74bafb1.tar.gz
rust-d6eb0f5a09247ff8443e68b4d17e0350c74bafb1.zip
Auto merge of #122582 - scottmcm:swap-intrinsic-v2, r=oli-obk
Let codegen decide when to `mem::swap` with immediates

Making `libcore` decide this is silly; the backend has so much better information about when it's a good idea.

Thus this PR introduces a new `typed_swap` intrinsic with a fallback body, and replaces that fallback implementation when swapping immediates or scalar pairs.

r? oli-obk

Replaces #111744, and means we'll never need more libs PRs like #111803 or #107140
Diffstat (limited to 'compiler/rustc_codegen_ssa')
-rw-r--r--compiler/rustc_codegen_ssa/src/mir/intrinsic.rs24
-rw-r--r--compiler/rustc_codegen_ssa/src/traits/builder.rs54
-rw-r--r--compiler/rustc_codegen_ssa/src/traits/type_.rs14
3 files changed, 90 insertions, 2 deletions
diff --git a/compiler/rustc_codegen_ssa/src/mir/intrinsic.rs b/compiler/rustc_codegen_ssa/src/mir/intrinsic.rs
index 5532ff6e6a5..3e6cf0ece29 100644
--- a/compiler/rustc_codegen_ssa/src/mir/intrinsic.rs
+++ b/compiler/rustc_codegen_ssa/src/mir/intrinsic.rs
@@ -9,6 +9,7 @@ use crate::traits::*;
 use crate::MemFlags;
 
 use rustc_middle::ty::{self, Ty, TyCtxt};
+use rustc_session::config::OptLevel;
 use rustc_span::{sym, Span};
 use rustc_target::abi::{
     call::{FnAbi, PassMode},
@@ -75,6 +76,29 @@ impl<'a, 'tcx, Bx: BuilderMethods<'a, 'tcx>> FunctionCx<'a, 'tcx, Bx> {
         let name = bx.tcx().item_name(def_id);
         let name_str = name.as_str();
 
+        // If we're swapping something that's *not* an `OperandValue::Ref`,
+        // then we can do it directly and avoid the alloca.
+        // Otherwise, we'll let the fallback MIR body take care of it.
+        if let sym::typed_swap = name {
+            let pointee_ty = fn_args.type_at(0);
+            let pointee_layout = bx.layout_of(pointee_ty);
+            if !bx.is_backend_ref(pointee_layout)
+                // But if we're not going to optimize, trying to use the fallback
+                // body just makes things worse, so don't bother.
+                || bx.sess().opts.optimize == OptLevel::No
+                // NOTE(eddyb) SPIR-V's Logical addressing model doesn't allow for arbitrary
+                // reinterpretation of values as (chunkable) byte arrays, and the loop in the
+                // block optimization in `ptr::swap_nonoverlapping` is hard to rewrite back
+                // into the (unoptimized) direct swapping implementation, so we disable it.
+                || bx.sess().target.arch == "spirv"
+            {
+                let x_place = PlaceRef::new_sized(args[0].immediate(), pointee_layout);
+                let y_place = PlaceRef::new_sized(args[1].immediate(), pointee_layout);
+                bx.typed_place_swap(x_place, y_place);
+                return Ok(());
+            }
+        }
+
         let llret_ty = bx.backend_type(bx.layout_of(ret_ty));
         let result = PlaceRef::new_sized(llresult, fn_abi.ret.layout);
 
diff --git a/compiler/rustc_codegen_ssa/src/traits/builder.rs b/compiler/rustc_codegen_ssa/src/traits/builder.rs
index 36f37e3791b..7bc9dee3a89 100644
--- a/compiler/rustc_codegen_ssa/src/traits/builder.rs
+++ b/compiler/rustc_codegen_ssa/src/traits/builder.rs
@@ -1,22 +1,24 @@
 use super::abi::AbiBuilderMethods;
 use super::asm::AsmBuilderMethods;
+use super::consts::ConstMethods;
 use super::coverageinfo::CoverageInfoBuilderMethods;
 use super::debuginfo::DebugInfoBuilderMethods;
 use super::intrinsic::IntrinsicCallMethods;
 use super::misc::MiscMethods;
-use super::type_::{ArgAbiMethods, BaseTypeMethods};
+use super::type_::{ArgAbiMethods, BaseTypeMethods, LayoutTypeMethods};
 use super::{HasCodegen, StaticBuilderMethods};
 
 use crate::common::{
     AtomicOrdering, AtomicRmwBinOp, IntPredicate, RealPredicate, SynchronizationScope, TypeKind,
 };
-use crate::mir::operand::OperandRef;
+use crate::mir::operand::{OperandRef, OperandValue};
 use crate::mir::place::PlaceRef;
 use crate::MemFlags;
 
 use rustc_middle::middle::codegen_fn_attrs::CodegenFnAttrs;
 use rustc_middle::ty::layout::{HasParamEnv, TyAndLayout};
 use rustc_middle::ty::Ty;
+use rustc_session::config::OptLevel;
 use rustc_span::Span;
 use rustc_target::abi::call::FnAbi;
 use rustc_target::abi::{Abi, Align, Scalar, Size, WrappingRange};
@@ -267,6 +269,54 @@ pub trait BuilderMethods<'a, 'tcx>:
         flags: MemFlags,
     );
 
+    /// *Typed* copy for non-overlapping places.
+    ///
+    /// Has a default implementation in terms of `memcpy`, but specific backends
+    /// can override to do something smarter if possible.
+    ///
+    /// (For example, typed load-stores with alias metadata.)
+    fn typed_place_copy(
+        &mut self,
+        dst: PlaceRef<'tcx, Self::Value>,
+        src: PlaceRef<'tcx, Self::Value>,
+    ) {
+        debug_assert!(src.llextra.is_none());
+        debug_assert!(dst.llextra.is_none());
+        debug_assert_eq!(dst.layout.size, src.layout.size);
+        if self.sess().opts.optimize == OptLevel::No && self.is_backend_immediate(dst.layout) {
+            // If we're not optimizing, the aliasing information from `memcpy`
+            // isn't useful, so just load-store the value for smaller code.
+            let temp = self.load_operand(src);
+            temp.val.store(self, dst);
+        } else if !dst.layout.is_zst() {
+            let bytes = self.const_usize(dst.layout.size.bytes());
+            self.memcpy(dst.llval, dst.align, src.llval, src.align, bytes, MemFlags::empty());
+        }
+    }
+
+    /// *Typed* swap for non-overlapping places.
+    ///
+    /// Avoids `alloca`s for Immediates and ScalarPairs.
+    ///
+    /// FIXME: Maybe do something smarter for Ref types too?
+    /// For now, the `typed_swap` intrinsic just doesn't call this for those
+    /// cases (in non-debug), preferring the fallback body instead.
+    fn typed_place_swap(
+        &mut self,
+        left: PlaceRef<'tcx, Self::Value>,
+        right: PlaceRef<'tcx, Self::Value>,
+    ) {
+        let mut temp = self.load_operand(left);
+        if let OperandValue::Ref(..) = temp.val {
+            // The SSA value isn't stand-alone, so we need to copy it elsewhere
+            let alloca = PlaceRef::alloca(self, left.layout);
+            self.typed_place_copy(alloca, left);
+            temp = self.load_operand(alloca);
+        }
+        self.typed_place_copy(left, right);
+        temp.val.store(self, right);
+    }
+
     fn select(
         &mut self,
         cond: Self::Value,
diff --git a/compiler/rustc_codegen_ssa/src/traits/type_.rs b/compiler/rustc_codegen_ssa/src/traits/type_.rs
index 72cce43a3fa..555833759eb 100644
--- a/compiler/rustc_codegen_ssa/src/traits/type_.rs
+++ b/compiler/rustc_codegen_ssa/src/traits/type_.rs
@@ -120,6 +120,20 @@ pub trait LayoutTypeMethods<'tcx>: Backend<'tcx> {
         immediate: bool,
     ) -> Self::Type;
 
+    /// A type that produces an [`OperandValue::Ref`] when loaded.
+    ///
+    /// AKA one that's not a ZST, not `is_backend_immediate`, and
+    /// not `is_backend_scalar_pair`. For such a type, a
+    /// [`load_operand`] doesn't actually `load` anything.
+    ///
+    /// [`OperandValue::Ref`]: crate::mir::operand::OperandValue::Ref
+    /// [`load_operand`]: super::BuilderMethods::load_operand
+    fn is_backend_ref(&self, layout: TyAndLayout<'tcx>) -> bool {
+        !(layout.is_zst()
+            || self.is_backend_immediate(layout)
+            || self.is_backend_scalar_pair(layout))
+    }
+
     /// A type that can be used in a [`super::BuilderMethods::load`] +
     /// [`super::BuilderMethods::store`] pair to implement a *typed* copy,
     /// such as a MIR `*_0 = *_1`.