about summary refs log tree commit diff
path: root/src/libcore
diff options
context:
space:
mode:
authorDjzin <djzin@users.noreply.github.com>2017-05-07 20:26:19 +0100
committerDjzin <djzin@users.noreply.github.com>2017-05-07 20:26:19 +0100
commit165f3668d602c4eaa02125cf86fb8d12719cb441 (patch)
tree6d050fccd4dbadfa4defe8a01bc35b708c821e34 /src/libcore
parentc6ca81aa921415a33e1c8f32e0c53a2c5eb6b485 (diff)
downloadrust-165f3668d602c4eaa02125cf86fb8d12719cb441.tar.gz
rust-165f3668d602c4eaa02125cf86fb8d12719cb441.zip
optimize out stack alignment for sizes < 32
Diffstat (limited to 'src/libcore')
-rw-r--r--src/libcore/mem.rs17
1 files changed, 12 insertions, 5 deletions
diff --git a/src/libcore/mem.rs b/src/libcore/mem.rs
index e51976c4845..2dc1f0e04bc 100644
--- a/src/libcore/mem.rs
+++ b/src/libcore/mem.rs
@@ -453,15 +453,13 @@ pub fn swap<T>(x: &mut T, y: &mut T) {
         // #[repr(simd)], even if we don't actually use this struct directly.
         #[repr(simd)]
         struct Block(u64, u64, u64, u64);
-        let block_size = size_of::<Block>();
+        struct UnalignedBlock(u64, u64, u64, u64);
 
-        // Create some uninitialized memory as scratch space
-        let mut t: Block = uninitialized();
+        let block_size = size_of::<Block>();
 
-        // Get raw pointers to the bytes of x, y & t for easier manipulation
+        // Get raw pointers to the bytes of x & y for easier manipulation
         let x = x as *mut T as *mut u8;
         let y = y as *mut T as *mut u8;
-        let t = &mut t as *mut _ as *mut u8;
 
         // Loop through x & y, copying them `Block` at a time
         // The optimizer should unroll the loop fully for most types
@@ -469,6 +467,12 @@ pub fn swap<T>(x: &mut T, y: &mut T) {
         let len = size_of::<T>() as isize;
         let mut i = 0;
         while i + block_size as isize <= len {
+            // Create some uninitialized memory as scratch space
+            // Moving the declaration of `t` here avoids aligning the stack when
+            // this loop is unused
+            let mut t: Block = uninitialized();
+            let t = &mut t as *mut _ as *mut u8;
+
             // Swap a block of bytes of x & y, using t as a temporary buffer
             // This should be optimized into efficient SIMD operations where available
             ptr::copy_nonoverlapping(x.offset(i), t, block_size);
@@ -478,6 +482,9 @@ pub fn swap<T>(x: &mut T, y: &mut T) {
         }
         if i < len {
             // Swap any remaining bytes
+            let mut t: UnalignedBlock = uninitialized();
+            let t = &mut t as *mut _ as *mut u8;
+
             let rem = (len - i) as usize;
             ptr::copy_nonoverlapping(x.offset(i), t, rem);
             ptr::copy_nonoverlapping(y.offset(i), x.offset(i), rem);