about summary refs log tree commit diff
path: root/src/test/codegen
diff options
context:
space:
mode:
authorScott McMurray <scottmcm@users.noreply.github.com>2022-02-26 18:57:15 -0800
committerScott McMurray <scottmcm@users.noreply.github.com>2022-02-26 18:57:15 -0800
commitb582bd388f5693119bbefa85ed7ea055760f9eef (patch)
treec076ed8ed252576ece42ed7fa6a6e238fde5486b /src/test/codegen
parent10cc7a6d031fd607f594f4c7af113bfaa9a879e9 (diff)
downloadrust-b582bd388f5693119bbefa85ed7ea055760f9eef.tar.gz
rust-b582bd388f5693119bbefa85ed7ea055760f9eef.zip
For MIRI, cfg out the swap logic from 94212
Diffstat (limited to 'src/test/codegen')
-rw-r--r--src/test/codegen/swap-large-types.rs27
1 files changed, 27 insertions, 0 deletions
diff --git a/src/test/codegen/swap-large-types.rs b/src/test/codegen/swap-large-types.rs
index 535d301a3d2..91a1ab7144f 100644
--- a/src/test/codegen/swap-large-types.rs
+++ b/src/test/codegen/swap-large-types.rs
@@ -39,6 +39,9 @@ pub fn swap_std(x: &mut KeccakBuffer, y: &mut KeccakBuffer) {
     swap(x, y)
 }
 
+// Verify that types with usize alignment are swapped via vectored usizes,
+// not falling back to byte-level code.
+
 // CHECK-LABEL: @swap_slice
 #[no_mangle]
 pub fn swap_slice(x: &mut [KeccakBuffer], y: &mut [KeccakBuffer]) {
@@ -50,6 +53,8 @@ pub fn swap_slice(x: &mut [KeccakBuffer], y: &mut [KeccakBuffer]) {
     }
 }
 
+// But for a large align-1 type, vectorized byte copying is what we want.
+
 type OneKilobyteBuffer = [u8; 1024];
 
 // CHECK-LABEL: @swap_1kb_slices
@@ -62,3 +67,25 @@ pub fn swap_1kb_slices(x: &mut [OneKilobyteBuffer], y: &mut [OneKilobyteBuffer])
         x.swap_with_slice(y);
     }
 }
+
+// This verifies that the 2×read + 2×write optimizes to just 3 memcpys
+// for an unusual type like this.  It's not clear whether we should do anything
+// smarter in Rust for these, so for now it's fine to leave these up to the backend.
+// That's not as bad as it might seem, as for example, LLVM will lower the
+// memcpys below to VMOVAPS on YMMs if one enables the AVX target feature.
+// Eventually we'll be able to pass `align_of::<T>` to a const generic and
+// thus pick a smarter chunk size ourselves without huge code duplication.
+
+#[repr(align(64))]
+pub struct BigButHighlyAligned([u8; 64 * 3]);
+
+// CHECK-LABEL: @swap_big_aligned
+#[no_mangle]
+pub fn swap_big_aligned(x: &mut BigButHighlyAligned, y: &mut BigButHighlyAligned) {
+// CHECK-NOT: call void @llvm.memcpy
+// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* noundef nonnull align 64 dereferenceable(192)
+// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* noundef nonnull align 64 dereferenceable(192)
+// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* noundef nonnull align 64 dereferenceable(192)
+// CHECK-NOT: call void @llvm.memcpy
+    swap(x, y)
+}