diff options
| author | bors <bors@rust-lang.org> | 2022-02-27 17:42:48 +0000 |
|---|---|---|
| committer | bors <bors@rust-lang.org> | 2022-02-27 17:42:48 +0000 |
| commit | 6a705566166debf5eff88c57140df607fa409aaa (patch) | |
| tree | 943724a46461430c07e5df2b608911e7392c4c39 /src/test/codegen | |
| parent | 3b1fe7e7c95e14dd8a420edf2f8a160c70211e04 (diff) | |
| parent | b582bd388f5693119bbefa85ed7ea055760f9eef (diff) | |
| download | rust-6a705566166debf5eff88c57140df607fa409aaa.tar.gz rust-6a705566166debf5eff88c57140df607fa409aaa.zip | |
Auto merge of #94412 - scottmcm:cfg-out-miri-from-swap, r=oli-obk
For MIRI, cfg out the swap vectorization logic from 94212 Because of #69488 the swap logic from #94212 doesn't currently work in MIRI. Copying in smaller pieces is probably much worse for its performance anyway, so it'd probably rather just use the simple path regardless. Part of #94371, though another PR will be needed for the CTFE aspect. r? `@oli-obk` cc `@RalfJung`
Diffstat (limited to 'src/test/codegen')
| -rw-r--r-- | src/test/codegen/swap-large-types.rs | 27 |
1 files changed, 27 insertions, 0 deletions
diff --git a/src/test/codegen/swap-large-types.rs b/src/test/codegen/swap-large-types.rs index 535d301a3d2..91a1ab7144f 100644 --- a/src/test/codegen/swap-large-types.rs +++ b/src/test/codegen/swap-large-types.rs @@ -39,6 +39,9 @@ pub fn swap_std(x: &mut KeccakBuffer, y: &mut KeccakBuffer) { swap(x, y) } +// Verify that types with usize alignment are swapped via vectored usizes, +// not falling back to byte-level code. + // CHECK-LABEL: @swap_slice #[no_mangle] pub fn swap_slice(x: &mut [KeccakBuffer], y: &mut [KeccakBuffer]) { @@ -50,6 +53,8 @@ pub fn swap_slice(x: &mut [KeccakBuffer], y: &mut [KeccakBuffer]) { } } +// But for a large align-1 type, vectorized byte copying is what we want. + type OneKilobyteBuffer = [u8; 1024]; // CHECK-LABEL: @swap_1kb_slices @@ -62,3 +67,25 @@ pub fn swap_1kb_slices(x: &mut [OneKilobyteBuffer], y: &mut [OneKilobyteBuffer]) x.swap_with_slice(y); } } + +// This verifies that the 2×read + 2×write optimizes to just 3 memcpys +// for an unusual type like this. It's not clear whether we should do anything +// smarter in Rust for these, so for now it's fine to leave these up to the backend. +// That's not as bad as it might seem, as for example, LLVM will lower the +// memcpys below to VMOVAPS on YMMs if one enables the AVX target feature. +// Eventually we'll be able to pass `align_of::<T>` to a const generic and +// thus pick a smarter chunk size ourselves without huge code duplication. + +#[repr(align(64))] +pub struct BigButHighlyAligned([u8; 64 * 3]); + +// CHECK-LABEL: @swap_big_aligned +#[no_mangle] +pub fn swap_big_aligned(x: &mut BigButHighlyAligned, y: &mut BigButHighlyAligned) { +// CHECK-NOT: call void @llvm.memcpy +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* noundef nonnull align 64 dereferenceable(192) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* noundef nonnull align 64 dereferenceable(192) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* noundef nonnull align 64 dereferenceable(192) +// CHECK-NOT: call void @llvm.memcpy + swap(x, y) +} |
