Don't use SIMD in mem::swap for types smaller than the block size

LLVM isn't able to remove the alloca for the unaligned block in the SIMD tail in some cases, so doing this helps SRoA work in cases where it currently doesn't. Found in the `replace_with` RFC discussion.
author: Scott McMurray <scottmcm@users.noreply.github.com> 2018-07-04 02:48:30 -0700
committer: Scott McMurray <scottmcm@users.noreply.github.com> 2018-07-21 21:43:37 -0700
commit: e6fc62a1ef6cfb545d4f33914a4440c6bbcbf9eb (patch)
tree: 170eec70a297f7237aee84232ab1572f62706113 /src
parent: 874dec25ed4c08d36f17d396b6872ca50313fc8e (diff)
download: rust-e6fc62a1ef6cfb545d4f33914a4440c6bbcbf9eb.tar.gz
rust-e6fc62a1ef6cfb545d4f33914a4440c6bbcbf9eb.zip
3 files changed, 40 insertions, 1 deletions
diff --git a/src/libcore/mem.rs b/src/libcore/mem.rs
index 8fb4e0d6a02..a0fe6e98806 100644
--- a/src/libcore/mem.rs
+++ b/src/libcore/mem.rs
@@ -638,7 +638,7 @@ pub unsafe fn uninitialized<T>() -> T {
 #[stable(feature = "rust1", since = "1.0.0")]
 pub fn swap<T>(x: &mut T, y: &mut T) {
     unsafe {
-        ptr::swap_nonoverlapping(x, y, 1);
+        ptr::swap_nonoverlapping_one(x, y);
     }
 }
 
diff --git a/src/libcore/ptr.rs b/src/libcore/ptr.rs
index 0af642258c2..f1405b58e1b 100644
--- a/src/libcore/ptr.rs
+++ b/src/libcore/ptr.rs
@@ -188,6 +188,19 @@ pub unsafe fn swap_nonoverlapping<T>(x: *mut T, y: *mut T, count: usize) {
 }
 
 #[inline]
+pub(crate) unsafe fn swap_nonoverlapping_one<T>(x: *mut T, y: *mut T) {
+    // For types smaller than the block optimization below,
+    // just swap directly to avoid pessimizing codegen.
+    if mem::size_of::<T>() < 32 {
+        let z = read(x);
+        copy_nonoverlapping(y, x, 1);
+        write(y, z);
+    } else {
+        swap_nonoverlapping(x, y, 1);
+    }
+}
+
+#[inline]
 unsafe fn swap_nonoverlapping_bytes(x: *mut u8, y: *mut u8, len: usize) {
     // The approach here is to utilize simd to swap x & y efficiently. Testing reveals
     // that swapping either 32 bytes or 64 bytes at a time is most efficient for intel
diff --git a/src/test/codegen/swap-small-types.rs b/src/test/codegen/swap-small-types.rs
new file mode 100644
index 00000000000..f34a1d669bd
--- /dev/null
+++ b/src/test/codegen/swap-small-types.rs
@@ -0,0 +1,26 @@
+// Copyright 2018 The Rust Project Developers. See the COPYRIGHT
+// file at the top-level directory of this distribution and at
+// http://rust-lang.org/COPYRIGHT.
+//
+// Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
+// http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
+// <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
+// option. This file may not be copied, modified, or distributed
+// except according to those terms.
+
+// compile-flags: -O
+
+#![crate_type = "lib"]
+
+use std::mem::swap;
+
+type RGB48 = [u16; 3];
+
+// CHECK-LABEL: @swap_rgb48
+#[no_mangle]
+pub fn swap_rgb48(x: &mut RGB48, y: &mut RGB48) {
+// CHECK-NOT: alloca
+// CHECK: load i48
+// CHECK: store i48
+    swap(x, y)
+}
author	Scott McMurray <scottmcm@users.noreply.github.com>	2018-07-04 02:48:30 -0700
committer	Scott McMurray <scottmcm@users.noreply.github.com>	2018-07-21 21:43:37 -0700
commit	e6fc62a1ef6cfb545d4f33914a4440c6bbcbf9eb (patch)
tree	170eec70a297f7237aee84232ab1572f62706113 /src
parent	874dec25ed4c08d36f17d396b6872ca50313fc8e (diff)
download	rust-e6fc62a1ef6cfb545d4f33914a4440c6bbcbf9eb.tar.gz rust-e6fc62a1ef6cfb545d4f33914a4440c6bbcbf9eb.zip