about summary refs log tree commit diff
diff options
context:
space:
mode:
authorLukas Bergdoll <lukas.bergdoll@gmail.com>2023-05-16 18:38:32 +0200
committerLukas Bergdoll <lukas.bergdoll@gmail.com>2023-05-16 18:38:32 +0200
commit42655ff03b5c3bb5bd8484e7b1b65d5990822d5b (patch)
tree39cb5525d5d0557dbcc562e955c67aa06b05e2f5
parenta673ad6b5746a6256cb898edb8b888163df1872c (diff)
downloadrust-42655ff03b5c3bb5bd8484e7b1b65d5990822d5b.tar.gz
rust-42655ff03b5c3bb5bd8484e7b1b65d5990822d5b.zip
Use code with reliable branchless code-gen for slice::sort merge
The recent LLVM 16 update changes code-gen to be not branchless anymore, in the
slice::sort implementation merge function. This improves performance by 30% for
random patterns, restoring the performance to the state with LLVM 15.
-rw-r--r--library/core/src/slice/sort.rs38
1 files changed, 12 insertions, 26 deletions
diff --git a/library/core/src/slice/sort.rs b/library/core/src/slice/sort.rs
index e6e3b55efa9..eb8595ca90d 100644
--- a/library/core/src/slice/sort.rs
+++ b/library/core/src/slice/sort.rs
@@ -1085,12 +1085,12 @@ where
 
             // SAFETY: left and right must be valid and part of v same for out.
             unsafe {
-                let to_copy = if is_less(&*right, &**left) {
-                    get_and_increment(&mut right)
-                } else {
-                    get_and_increment(left)
-                };
-                ptr::copy_nonoverlapping(to_copy, get_and_increment(out), 1);
+                let is_l = is_less(&*right, &**left);
+                let to_copy = if is_l { right } else { *left };
+                ptr::copy_nonoverlapping(to_copy, *out, 1);
+                *out = out.add(1);
+                right = right.add(is_l as usize);
+                *left = left.add(!is_l as usize);
             }
         }
     } else {
@@ -1113,32 +1113,18 @@ where
 
             // SAFETY: left and right must be valid and part of v same for out.
             unsafe {
-                let to_copy = if is_less(&*right.sub(1), &*left.sub(1)) {
-                    decrement_and_get(left)
-                } else {
-                    decrement_and_get(right)
-                };
-                ptr::copy_nonoverlapping(to_copy, decrement_and_get(&mut out), 1);
+                let is_l = is_less(&*right.sub(1), &*left.sub(1));
+                *left = left.sub(is_l as usize);
+                *right = right.sub(!is_l as usize);
+                let to_copy = if is_l { *left } else { *right };
+                out = out.sub(1);
+                ptr::copy_nonoverlapping(to_copy, out, 1);
             }
         }
     }
     // Finally, `hole` gets dropped. If the shorter run was not fully consumed, whatever remains of
     // it will now be copied into the hole in `v`.
 
-    unsafe fn get_and_increment<T>(ptr: &mut *mut T) -> *mut T {
-        let old = *ptr;
-
-        // SAFETY: ptr.add(1) must still be a valid pointer and part of `v`.
-        *ptr = unsafe { ptr.add(1) };
-        old
-    }
-
-    unsafe fn decrement_and_get<T>(ptr: &mut *mut T) -> *mut T {
-        // SAFETY: ptr.sub(1) must still be a valid pointer and part of `v`.
-        *ptr = unsafe { ptr.sub(1) };
-        *ptr
-    }
-
     // When dropped, copies the range `start..end` into `dest..`.
     struct MergeHole<T> {
         start: *mut T,