about summary refs log tree commit diff
path: root/src/libcore/ptr.rs
diff options
context:
space:
mode:
authorScott McMurray <scottmcm@users.noreply.github.com>2018-07-04 02:48:30 -0700
committerScott McMurray <scottmcm@users.noreply.github.com>2018-07-21 21:43:37 -0700
commite6fc62a1ef6cfb545d4f33914a4440c6bbcbf9eb (patch)
tree170eec70a297f7237aee84232ab1572f62706113 /src/libcore/ptr.rs
parent874dec25ed4c08d36f17d396b6872ca50313fc8e (diff)
downloadrust-e6fc62a1ef6cfb545d4f33914a4440c6bbcbf9eb.tar.gz
rust-e6fc62a1ef6cfb545d4f33914a4440c6bbcbf9eb.zip
Don't use SIMD in mem::swap for types smaller than the block size
LLVM isn't able to remove the alloca for the unaligned block in the SIMD tail in some cases, so doing this helps SRoA work in cases where it currently doesn't.  Found in the `replace_with` RFC discussion.
Diffstat (limited to 'src/libcore/ptr.rs')
-rw-r--r--src/libcore/ptr.rs13
1 files changed, 13 insertions, 0 deletions
diff --git a/src/libcore/ptr.rs b/src/libcore/ptr.rs
index 0af642258c2..f1405b58e1b 100644
--- a/src/libcore/ptr.rs
+++ b/src/libcore/ptr.rs
@@ -188,6 +188,19 @@ pub unsafe fn swap_nonoverlapping<T>(x: *mut T, y: *mut T, count: usize) {
 }
 
 #[inline]
+pub(crate) unsafe fn swap_nonoverlapping_one<T>(x: *mut T, y: *mut T) {
+    // For types smaller than the block optimization below,
+    // just swap directly to avoid pessimizing codegen.
+    if mem::size_of::<T>() < 32 {
+        let z = read(x);
+        copy_nonoverlapping(y, x, 1);
+        write(y, z);
+    } else {
+        swap_nonoverlapping(x, y, 1);
+    }
+}
+
+#[inline]
 unsafe fn swap_nonoverlapping_bytes(x: *mut u8, y: *mut u8, len: usize) {
     // The approach here is to utilize simd to swap x & y efficiently. Testing reveals
     // that swapping either 32 bytes or 64 bytes at a time is most efficient for intel