diff options
| author | Scott McMurray <scottmcm@users.noreply.github.com> | 2025-02-21 21:00:43 -0800 |
|---|---|---|
| committer | Scott McMurray <scottmcm@users.noreply.github.com> | 2025-04-09 09:09:37 -0700 |
| commit | 50d0ce1b42d67ce98b2ccac55d22a2ff8abe3273 (patch) | |
| tree | b372a833e17bf40219302ce5aa5d180ca42675f4 /tests | |
| parent | f06e5c1e35bc5bc6131c6f8a0eb782097e3f28c3 (diff) | |
| download | rust-50d0ce1b42d67ce98b2ccac55d22a2ff8abe3273.tar.gz rust-50d0ce1b42d67ce98b2ccac55d22a2ff8abe3273.zip | |
Ensure `swap_nonoverlapping` is really always untyped
Diffstat (limited to 'tests')
| -rw-r--r-- | tests/assembly/x86_64-typed-swap.rs | 28 | ||||
| -rw-r--r-- | tests/codegen/simd/swap-simd-types.rs | 8 | ||||
| -rw-r--r-- | tests/codegen/swap-large-types.rs | 76 | ||||
| -rw-r--r-- | tests/codegen/swap-small-types.rs | 78 | ||||
| -rw-r--r-- | tests/ui/consts/missing_span_in_backtrace.stderr | 6 |
5 files changed, 148 insertions, 48 deletions
diff --git a/tests/assembly/x86_64-typed-swap.rs b/tests/assembly/x86_64-typed-swap.rs index dfd6ee565bc..a6753011d36 100644 --- a/tests/assembly/x86_64-typed-swap.rs +++ b/tests/assembly/x86_64-typed-swap.rs @@ -51,3 +51,31 @@ pub fn swap_simd(x: &mut __m128, y: &mut __m128) { // CHECK-NEXT: retq swap(x, y) } + +// CHECK-LABEL: swap_string: +#[no_mangle] +pub fn swap_string(x: &mut String, y: &mut String) { + // CHECK-NOT: mov + // CHECK-COUNT-4: movups + // CHECK-NOT: mov + // CHECK-COUNT-4: movq + // CHECK-NOT: mov + swap(x, y) +} + +// CHECK-LABEL: swap_44_bytes: +#[no_mangle] +pub fn swap_44_bytes(x: &mut [u8; 44], y: &mut [u8; 44]) { + // Ensure we do better than a long run of byte copies, + // see <https://github.com/rust-lang/rust/issues/134946> + + // CHECK-NOT: movb + // CHECK-COUNT-8: movups{{.+}}xmm + // CHECK-NOT: movb + // CHECK-COUNT-4: movq + // CHECK-NOT: movb + // CHECK-COUNT-4: movl + // CHECK-NOT: movb + // CHECK: retq + swap(x, y) +} diff --git a/tests/codegen/simd/swap-simd-types.rs b/tests/codegen/simd/swap-simd-types.rs index 69767d0a755..c063cc683a6 100644 --- a/tests/codegen/simd/swap-simd-types.rs +++ b/tests/codegen/simd/swap-simd-types.rs @@ -23,8 +23,8 @@ pub fn swap_single_m256(x: &mut __m256, y: &mut __m256) { #[no_mangle] pub fn swap_m256_slice(x: &mut [__m256], y: &mut [__m256]) { // CHECK-NOT: alloca - // CHECK: load <8 x float>{{.+}}align 32 - // CHECK: store <8 x float>{{.+}}align 32 + // CHECK-COUNT-2: load <4 x i64>{{.+}}align 32 + // CHECK-COUNT-2: store <4 x i64>{{.+}}align 32 if x.len() == y.len() { x.swap_with_slice(y); } @@ -34,7 +34,7 @@ pub fn swap_m256_slice(x: &mut [__m256], y: &mut [__m256]) { #[no_mangle] pub fn swap_bytes32(x: &mut [u8; 32], y: &mut [u8; 32]) { // CHECK-NOT: alloca - // CHECK: load <32 x i8>{{.+}}align 1 - // CHECK: store <32 x i8>{{.+}}align 1 + // CHECK-COUNT-2: load <4 x i64>{{.+}}align 1 + // CHECK-COUNT-2: store <4 x i64>{{.+}}align 1 swap(x, y) } diff --git a/tests/codegen/swap-large-types.rs b/tests/codegen/swap-large-types.rs index 49a41bb1469..08c486affd9 100644 --- a/tests/codegen/swap-large-types.rs +++ b/tests/codegen/swap-large-types.rs @@ -12,6 +12,16 @@ type KeccakBuffer = [[u64; 5]; 5]; // to stack for large types, which is completely unnecessary as the lack of // overlap means we can just do whatever fits in registers at a time. +// The tests here (after the first one showing that the problem still exists) +// are less about testing *exactly* what the codegen is, and more about testing +// 1) That things are swapped directly from one argument to the other, +// never going through stack along the way, and +// 2) That we're doing the swapping for big things using large vector types, +// rather then `i64` or `<8 x i8>` (or, even worse, `i8`) at a time. +// +// (There are separate tests for intrinsics::typed_swap_nonoverlapping that +// check that it, as an intrinsic, are emitting exactly what it should.) + // CHECK-LABEL: @swap_basic #[no_mangle] pub fn swap_basic(x: &mut KeccakBuffer, y: &mut KeccakBuffer) { @@ -26,55 +36,55 @@ pub fn swap_basic(x: &mut KeccakBuffer, y: &mut KeccakBuffer) { } } -// This test verifies that the library does something smarter, and thus -// doesn't need any scratch space on the stack. - // CHECK-LABEL: @swap_std #[no_mangle] pub fn swap_std(x: &mut KeccakBuffer, y: &mut KeccakBuffer) { // CHECK-NOT: alloca - // CHECK: load <{{[0-9]+}} x i64> - // CHECK: store <{{[0-9]+}} x i64> + // CHECK: load <{{2|4}} x i64> + // CHECK: store <{{2|4}} x i64> swap(x, y) } -// Verify that types with usize alignment are swapped via vectored usizes, -// not falling back to byte-level code. - // CHECK-LABEL: @swap_slice #[no_mangle] pub fn swap_slice(x: &mut [KeccakBuffer], y: &mut [KeccakBuffer]) { // CHECK-NOT: alloca - // CHECK: load <{{[0-9]+}} x i64> - // CHECK: store <{{[0-9]+}} x i64> + // CHECK: load <{{2|4}} x i64> + // CHECK: store <{{2|4}} x i64> if x.len() == y.len() { x.swap_with_slice(y); } } -// But for a large align-1 type, vectorized byte copying is what we want. - type OneKilobyteBuffer = [u8; 1024]; // CHECK-LABEL: @swap_1kb_slices #[no_mangle] pub fn swap_1kb_slices(x: &mut [OneKilobyteBuffer], y: &mut [OneKilobyteBuffer]) { // CHECK-NOT: alloca - // CHECK: load <{{[0-9]+}} x i8> - // CHECK: store <{{[0-9]+}} x i8> + + // CHECK-NOT: load i32 + // CHECK-NOT: store i32 + // CHECK-NOT: load i16 + // CHECK-NOT: store i16 + // CHECK-NOT: load i8 + // CHECK-NOT: store i8 + + // CHECK: load <{{2|4}} x i64>{{.+}}align 1, + // CHECK: store <{{2|4}} x i64>{{.+}}align 1, + + // CHECK-NOT: load i32 + // CHECK-NOT: store i32 + // CHECK-NOT: load i16 + // CHECK-NOT: store i16 + // CHECK-NOT: load i8 + // CHECK-NOT: store i8 + if x.len() == y.len() { x.swap_with_slice(y); } } -// This verifies that the 2×read + 2×write optimizes to just 3 memcpys -// for an unusual type like this. It's not clear whether we should do anything -// smarter in Rust for these, so for now it's fine to leave these up to the backend. -// That's not as bad as it might seem, as for example, LLVM will lower the -// memcpys below to VMOVAPS on YMMs if one enables the AVX target feature. -// Eventually we'll be able to pass `align_of::<T>` to a const generic and -// thus pick a smarter chunk size ourselves without huge code duplication. - #[repr(align(64))] pub struct BigButHighlyAligned([u8; 64 * 3]); @@ -82,9 +92,25 @@ pub struct BigButHighlyAligned([u8; 64 * 3]); #[no_mangle] pub fn swap_big_aligned(x: &mut BigButHighlyAligned, y: &mut BigButHighlyAligned) { // CHECK-NOT: call void @llvm.memcpy - // CHECK: call void @llvm.memcpy.{{.+}}(ptr noundef nonnull align 64 dereferenceable(192) - // CHECK: call void @llvm.memcpy.{{.+}}(ptr noundef nonnull align 64 dereferenceable(192) - // CHECK: call void @llvm.memcpy.{{.+}}(ptr noundef nonnull align 64 dereferenceable(192) + // CHECK-NOT: load i32 + // CHECK-NOT: store i32 + // CHECK-NOT: load i16 + // CHECK-NOT: store i16 + // CHECK-NOT: load i8 + // CHECK-NOT: store i8 + + // CHECK-COUNT-2: load <{{2|4}} x i64>{{.+}}align 64, + // CHECK-COUNT-2: store <{{2|4}} x i64>{{.+}}align 64, + + // CHECK-COUNT-2: load <{{2|4}} x i64>{{.+}}align 32, + // CHECK-COUNT-2: store <{{2|4}} x i64>{{.+}}align 32, + + // CHECK-NOT: load i32 + // CHECK-NOT: store i32 + // CHECK-NOT: load i16 + // CHECK-NOT: store i16 + // CHECK-NOT: load i8 + // CHECK-NOT: store i8 // CHECK-NOT: call void @llvm.memcpy swap(x, y) } diff --git a/tests/codegen/swap-small-types.rs b/tests/codegen/swap-small-types.rs index 76bb853e642..ffa573c9a43 100644 --- a/tests/codegen/swap-small-types.rs +++ b/tests/codegen/swap-small-types.rs @@ -1,5 +1,6 @@ //@ compile-flags: -Copt-level=3 -Z merge-functions=disabled //@ only-x86_64 +//@ min-llvm-version: 20 #![crate_type = "lib"] @@ -27,13 +28,19 @@ pub fn swap_rgb48_manually(x: &mut RGB48, y: &mut RGB48) { pub fn swap_rgb48(x: &mut RGB48, y: &mut RGB48) { // CHECK-NOT: alloca - // Whether `i8` is the best for this is unclear, but - // might as well record what's actually happening right now. - - // CHECK: load i8 - // CHECK: load i8 - // CHECK: store i8 - // CHECK: store i8 + // Swapping `i48` might be cleaner in LLVM-IR here, but `i32`+`i16` isn't bad, + // and is closer to the assembly it generates anyway. + + // CHECK-NOT: load{{ }} + // CHECK: load i32{{.+}}align 2 + // CHECK-NEXT: load i32{{.+}}align 2 + // CHECK-NEXT: store i32{{.+}}align 2 + // CHECK-NEXT: store i32{{.+}}align 2 + // CHECK: load i16{{.+}}align 2 + // CHECK-NEXT: load i16{{.+}}align 2 + // CHECK-NEXT: store i16{{.+}}align 2 + // CHECK-NEXT: store i16{{.+}}align 2 + // CHECK-NOT: store{{ }} swap(x, y) } @@ -76,30 +83,49 @@ pub fn swap_slices<'a>(x: &mut &'a [u32], y: &mut &'a [u32]) { swap(x, y) } -// LLVM doesn't vectorize a loop over 3-byte elements, -// so we chunk it down to bytes and loop over those instead. type RGB24 = [u8; 3]; // CHECK-LABEL: @swap_rgb24_slices #[no_mangle] pub fn swap_rgb24_slices(x: &mut [RGB24], y: &mut [RGB24]) { // CHECK-NOT: alloca - // CHECK: load <{{[0-9]+}} x i8> - // CHECK: store <{{[0-9]+}} x i8> + + // CHECK: mul nuw nsw i64 %{{x|y}}.1, 3 + + // CHECK: load <{{[0-9]+}} x i64> + // CHECK: store <{{[0-9]+}} x i64> + + // CHECK-COUNT-2: load i32 + // CHECK-COUNT-2: store i32 + // CHECK-COUNT-2: load i16 + // CHECK-COUNT-2: store i16 + // CHECK-COUNT-2: load i8 + // CHECK-COUNT-2: store i8 if x.len() == y.len() { x.swap_with_slice(y); } } -// This one has a power-of-two size, so we iterate over it directly type RGBA32 = [u8; 4]; // CHECK-LABEL: @swap_rgba32_slices #[no_mangle] pub fn swap_rgba32_slices(x: &mut [RGBA32], y: &mut [RGBA32]) { // CHECK-NOT: alloca - // CHECK: load <{{[0-9]+}} x i32> - // CHECK: store <{{[0-9]+}} x i32> + + // Because the size in bytes in a multiple of 4, we can skip the smallest sizes. + + // CHECK: load <{{[0-9]+}} x i64> + // CHECK: store <{{[0-9]+}} x i64> + + // CHECK-COUNT-2: load i32 + // CHECK-COUNT-2: store i32 + + // CHECK-NOT: load i16 + // CHECK-NOT: store i16 + // CHECK-NOT: load i8 + // CHECK-NOT: store i8 + if x.len() == y.len() { x.swap_with_slice(y); } @@ -113,8 +139,8 @@ const _: () = assert!(!std::mem::size_of::<String>().is_power_of_two()); #[no_mangle] pub fn swap_string_slices(x: &mut [String], y: &mut [String]) { // CHECK-NOT: alloca - // CHECK: load <{{[0-9]+}} x i64> - // CHECK: store <{{[0-9]+}} x i64> + // CHECK: load <{{[0-9]+}} x i64>{{.+}}, align 8, + // CHECK: store <{{[0-9]+}} x i64>{{.+}}, align 8, if x.len() == y.len() { x.swap_with_slice(y); } @@ -130,6 +156,26 @@ pub struct Packed { #[no_mangle] pub fn swap_packed_structs(x: &mut Packed, y: &mut Packed) { // CHECK-NOT: alloca + + // CHECK-NOT: load + // CHECK-NOT: store + + // CHECK: %[[A:.+]] = load i64, ptr %x, align 1, + // CHECK-NEXT: %[[B:.+]] = load i64, ptr %y, align 1, + // CHECK-NEXT: store i64 %[[B]], ptr %x, align 1, + // CHECK-NEXT: store i64 %[[A]], ptr %y, align 1, + + // CHECK-NOT: load + // CHECK-NOT: store + + // CHECK: %[[C:.+]] = load i8, ptr %[[X8:.+]], align 1, + // CHECK-NEXT: %[[D:.+]] = load i8, ptr %[[Y8:.+]], align 1, + // CHECK-NEXT: store i8 %[[D]], ptr %[[X8]], align 1, + // CHECK-NEXT: store i8 %[[C]], ptr %[[Y8]], align 1, + + // CHECK-NOT: load + // CHECK-NOT: store + // CHECK: ret void swap(x, y) } diff --git a/tests/ui/consts/missing_span_in_backtrace.stderr b/tests/ui/consts/missing_span_in_backtrace.stderr index 2f3a65302bd..aad3d76dd26 100644 --- a/tests/ui/consts/missing_span_in_backtrace.stderr +++ b/tests/ui/consts/missing_span_in_backtrace.stderr @@ -12,10 +12,10 @@ note: inside `swap_nonoverlapping::<MaybeUninit<u8>>` --> $SRC_DIR/core/src/ptr/mod.rs:LL:COL note: inside `swap_nonoverlapping::compiletime::<MaybeUninit<u8>>` --> $SRC_DIR/core/src/ptr/mod.rs:LL:COL -note: inside `std::ptr::swap_nonoverlapping_simple_untyped::<MaybeUninit<u8>>` - --> $SRC_DIR/core/src/ptr/mod.rs:LL:COL -note: inside `std::ptr::read::<MaybeUninit<MaybeUninit<u8>>>` +note: inside `std::ptr::swap_nonoverlapping_const::<MaybeUninit<u8>>` --> $SRC_DIR/core/src/ptr/mod.rs:LL:COL +note: inside `copy_nonoverlapping::<MaybeUninit<u8>>` + --> $SRC_DIR/core/src/intrinsics/mod.rs:LL:COL = help: this code performed an operation that depends on the underlying bytes representing a pointer = help: the absolute address of a pointer is not known at compile-time, so such operations are not supported = note: this error originates in the macro `$crate::intrinsics::const_eval_select` which comes from the expansion of the macro `const_eval_select` (in Nightly builds, run with -Z macro-backtrace for more info) |
