about summary refs log tree commit diff
path: root/src/test/codegen
diff options
context:
space:
mode:
authorbors <bors@rust-lang.org>2022-02-24 22:29:14 +0000
committerbors <bors@rust-lang.org>2022-02-24 22:29:14 +0000
commit4e82f35492ea5c78e19609bf4468f0a686d9a756 (patch)
tree23fbd4cd23c587684432375a288e303b80be8051 /src/test/codegen
parent4b043faba34ccc053a4d0110634c323f6c03765e (diff)
parent3bd163f4e8422db4c0de384b2b21bfaaecd2e5c1 (diff)
downloadrust-4e82f35492ea5c78e19609bf4468f0a686d9a756.tar.gz
rust-4e82f35492ea5c78e19609bf4468f0a686d9a756.zip
Auto merge of #94333 - Dylan-DPC:rollup-7yxtywp, r=Dylan-DPC
Rollup of 9 pull requests

Successful merges:

 - #91795 (resolve/metadata: Stop encoding macros as reexports)
 - #93714 (better ObligationCause for normalization errors in `can_type_implement_copy`)
 - #94175 (Improve `--check-cfg` implementation)
 - #94212 (Stop manually SIMDing in `swap_nonoverlapping`)
 - #94242 (properly handle fat pointers to uninhabitable types)
 - #94308 (Normalize main return type during mono item collection & codegen)
 - #94315 (update auto trait lint for `PhantomData`)
 - #94316 (Improve string literal unescaping)
 - #94327 (Avoid emitting full macro body into JSON errors)

Failed merges:

r? `@ghost`
`@rustbot` modify labels: rollup
Diffstat (limited to 'src/test/codegen')
-rw-r--r--src/test/codegen/swap-large-types.rs64
-rw-r--r--src/test/codegen/swap-simd-types.rs32
-rw-r--r--src/test/codegen/swap-small-types.rs44
3 files changed, 140 insertions, 0 deletions
diff --git a/src/test/codegen/swap-large-types.rs b/src/test/codegen/swap-large-types.rs
new file mode 100644
index 00000000000..535d301a3d2
--- /dev/null
+++ b/src/test/codegen/swap-large-types.rs
@@ -0,0 +1,64 @@
+// compile-flags: -O
+// only-x86_64
+// ignore-debug: the debug assertions get in the way
+
+#![crate_type = "lib"]
+
+use std::mem::swap;
+use std::ptr::{read, copy_nonoverlapping, write};
+
+type KeccakBuffer = [[u64; 5]; 5];
+
+// A basic read+copy+write swap implementation ends up copying one of the values
+// to stack for large types, which is completely unnecessary as the lack of
+// overlap means we can just do whatever fits in registers at a time.
+
+// CHECK-LABEL: @swap_basic
+#[no_mangle]
+pub fn swap_basic(x: &mut KeccakBuffer, y: &mut KeccakBuffer) {
+// CHECK: alloca [5 x [5 x i64]]
+
+    // SAFETY: exclusive references are always valid to read/write,
+    // are non-overlapping, and nothing here panics so it's drop-safe.
+    unsafe {
+        let z = read(x);
+        copy_nonoverlapping(y, x, 1);
+        write(y, z);
+    }
+}
+
+// This test verifies that the library does something smarter, and thus
+// doesn't need any scratch space on the stack.
+
+// CHECK-LABEL: @swap_std
+#[no_mangle]
+pub fn swap_std(x: &mut KeccakBuffer, y: &mut KeccakBuffer) {
+// CHECK-NOT: alloca
+// CHECK: load <{{[0-9]+}} x i64>
+// CHECK: store <{{[0-9]+}} x i64>
+    swap(x, y)
+}
+
+// CHECK-LABEL: @swap_slice
+#[no_mangle]
+pub fn swap_slice(x: &mut [KeccakBuffer], y: &mut [KeccakBuffer]) {
+// CHECK-NOT: alloca
+// CHECK: load <{{[0-9]+}} x i64>
+// CHECK: store <{{[0-9]+}} x i64>
+    if x.len() == y.len() {
+        x.swap_with_slice(y);
+    }
+}
+
+type OneKilobyteBuffer = [u8; 1024];
+
+// CHECK-LABEL: @swap_1kb_slices
+#[no_mangle]
+pub fn swap_1kb_slices(x: &mut [OneKilobyteBuffer], y: &mut [OneKilobyteBuffer]) {
+// CHECK-NOT: alloca
+// CHECK: load <{{[0-9]+}} x i8>
+// CHECK: store <{{[0-9]+}} x i8>
+    if x.len() == y.len() {
+        x.swap_with_slice(y);
+    }
+}
diff --git a/src/test/codegen/swap-simd-types.rs b/src/test/codegen/swap-simd-types.rs
new file mode 100644
index 00000000000..c90b277eb44
--- /dev/null
+++ b/src/test/codegen/swap-simd-types.rs
@@ -0,0 +1,32 @@
+// compile-flags: -O -C target-feature=+avx
+// only-x86_64
+// ignore-debug: the debug assertions get in the way
+
+#![crate_type = "lib"]
+
+use std::mem::swap;
+
+// SIMD types are highly-aligned already, so make sure the swap code leaves their
+// types alone and doesn't pessimize them (such as by swapping them as `usize`s).
+extern crate core;
+use core::arch::x86_64::__m256;
+
+// CHECK-LABEL: @swap_single_m256
+#[no_mangle]
+pub fn swap_single_m256(x: &mut __m256, y: &mut __m256) {
+// CHECK-NOT: alloca
+// CHECK: load <8 x float>{{.+}}align 32
+// CHECK: store <8 x float>{{.+}}align 32
+    swap(x, y)
+}
+
+// CHECK-LABEL: @swap_m256_slice
+#[no_mangle]
+pub fn swap_m256_slice(x: &mut [__m256], y: &mut [__m256]) {
+// CHECK-NOT: alloca
+// CHECK: load <8 x float>{{.+}}align 32
+// CHECK: store <8 x float>{{.+}}align 32
+    if x.len() == y.len() {
+        x.swap_with_slice(y);
+    }
+}
diff --git a/src/test/codegen/swap-small-types.rs b/src/test/codegen/swap-small-types.rs
index 6205e6a6559..2f375844cc7 100644
--- a/src/test/codegen/swap-small-types.rs
+++ b/src/test/codegen/swap-small-types.rs
@@ -16,3 +16,47 @@ pub fn swap_rgb48(x: &mut RGB48, y: &mut RGB48) {
 // CHECK: store i48
     swap(x, y)
 }
+
+// LLVM doesn't vectorize a loop over 3-byte elements,
+// so we chunk it down to bytes and loop over those instead.
+type RGB24 = [u8; 3];
+
+// CHECK-LABEL: @swap_rgb24_slices
+#[no_mangle]
+pub fn swap_rgb24_slices(x: &mut [RGB24], y: &mut [RGB24]) {
+// CHECK-NOT: alloca
+// CHECK: load <{{[0-9]+}} x i8>
+// CHECK: store <{{[0-9]+}} x i8>
+    if x.len() == y.len() {
+        x.swap_with_slice(y);
+    }
+}
+
+// This one has a power-of-two size, so we iterate over it directly
+type RGBA32 = [u8; 4];
+
+// CHECK-LABEL: @swap_rgba32_slices
+#[no_mangle]
+pub fn swap_rgba32_slices(x: &mut [RGBA32], y: &mut [RGBA32]) {
+// CHECK-NOT: alloca
+// CHECK: load <{{[0-9]+}} x i32>
+// CHECK: store <{{[0-9]+}} x i32>
+    if x.len() == y.len() {
+        x.swap_with_slice(y);
+    }
+}
+
+// Strings have a non-power-of-two size, but have pointer alignment,
+// so we swap usizes instead of dropping all the way down to bytes.
+const _: () = assert!(!std::mem::size_of::<String>().is_power_of_two());
+
+// CHECK-LABEL: @swap_string_slices
+#[no_mangle]
+pub fn swap_string_slices(x: &mut [String], y: &mut [String]) {
+// CHECK-NOT: alloca
+// CHECK: load <{{[0-9]+}} x i64>
+// CHECK: store <{{[0-9]+}} x i64>
+    if x.len() == y.len() {
+        x.swap_with_slice(y);
+    }
+}