about summary refs log tree commit diff
diff options
context:
space:
mode:
authorLukas Bergdoll <lukas.bergdoll@gmail.com>2024-08-29 10:32:59 +0200
committerLukas Bergdoll <lukas.bergdoll@gmail.com>2024-08-29 10:32:59 +0200
commit00eca77b7230732f0ba9a88b434022177de7da17 (patch)
tree2159d18f4e077489e31d7c17bdb9c14991ebc9f3
parent7815d77ee002facace45d6747e6d9b3686d55097 (diff)
downloadrust-00eca77b7230732f0ba9a88b434022177de7da17.tar.gz
rust-00eca77b7230732f0ba9a88b434022177de7da17.zip
Use simpler branchy swap logic in tiny merge sort
Avoids the code duplication issue and results in
smaller binary size, which after all is the
purpose of the feature.
-rw-r--r--library/core/src/slice/sort/shared/smallsort.rs5
-rw-r--r--library/core/src/slice/sort/stable/tiny.rs50
2 files changed, 13 insertions, 42 deletions
diff --git a/library/core/src/slice/sort/shared/smallsort.rs b/library/core/src/slice/sort/shared/smallsort.rs
index 5027962ccb4..aaa9f9f1c7f 100644
--- a/library/core/src/slice/sort/shared/smallsort.rs
+++ b/library/core/src/slice/sort/shared/smallsort.rs
@@ -378,6 +378,11 @@ where
 
 /// Swap two values in the slice pointed to by `v_base` at the position `a_pos` and `b_pos` if the
 /// value at position `b_pos` is less than the one at position `a_pos`.
+///
+/// Purposefully not marked `#[inline]`, despite us wanting it to be inlined for integers like
+/// types. `is_less` could be a huge function and we want to give the compiler an option to
+/// not inline this function. For the same reasons that this function is very perf critical
+/// it should be in the same module as the functions that use it.
 unsafe fn swap_if_less<T, F>(v_base: *mut T, a_pos: usize, b_pos: usize, is_less: &mut F)
 where
     F: FnMut(&T, &T) -> bool,
diff --git a/library/core/src/slice/sort/stable/tiny.rs b/library/core/src/slice/sort/stable/tiny.rs
index 307011f3ee8..071ab8e107f 100644
--- a/library/core/src/slice/sort/stable/tiny.rs
+++ b/library/core/src/slice/sort/stable/tiny.rs
@@ -1,6 +1,6 @@
 //! Binary-size optimized mergesort inspired by https://github.com/voultapher/tiny-sort-rs.
 
-use crate::mem::{ManuallyDrop, MaybeUninit};
+use crate::mem::MaybeUninit;
 use crate::ptr;
 use crate::slice::sort::stable::merge;
 
@@ -27,49 +27,15 @@ pub fn mergesort<T, F: FnMut(&T, &T) -> bool>(
 
         merge::merge(v, scratch, mid, is_less);
     } else if len == 2 {
-        // Branchless swap the two elements. This reduces the recursion depth and improves
-        // perf significantly at a small binary-size cost. Trades ~10% perf boost for integers
-        // for ~50 bytes in the binary.
-
         // SAFETY: We checked the len, the pointers we create are valid and don't overlap.
         unsafe {
-            swap_if_less(v.as_mut_ptr(), 0, 1, is_less);
-        }
-    }
-}
-
-/// Swap two values in the slice pointed to by `v_base` at the position `a_pos` and `b_pos` if the
-/// value at position `b_pos` is less than the one at position `a_pos`.
-unsafe fn swap_if_less<T, F>(v_base: *mut T, a_pos: usize, b_pos: usize, is_less: &mut F)
-where
-    F: FnMut(&T, &T) -> bool,
-{
-    // SAFETY: the caller must guarantee that `a` and `b` each added to `v_base` yield valid
-    // pointers into `v_base`, and are properly aligned, and part of the same allocation.
-    unsafe {
-        let v_a = v_base.add(a_pos);
-        let v_b = v_base.add(b_pos);
+            let v_base = v.as_mut_ptr();
+            let v_a = v_base;
+            let v_b = v_base.add(1);
 
-        // PANIC SAFETY: if is_less panics, no scratch memory was created and the slice should still be
-        // in a well defined state, without duplicates.
-
-        // Important to only swap if it is more and not if it is equal. is_less should return false for
-        // equal, so we don't swap.
-        let should_swap = is_less(&*v_b, &*v_a);
-
-        // This is a branchless version of swap if.
-        // The equivalent code with a branch would be:
-        //
-        // if should_swap {
-        //     ptr::swap(left, right, 1);
-        // }
-
-        // The goal is to generate cmov instructions here.
-        let left_swap = if should_swap { v_b } else { v_a };
-        let right_swap = if should_swap { v_a } else { v_b };
-
-        let right_swap_tmp = ManuallyDrop::new(ptr::read(right_swap));
-        ptr::copy(left_swap, v_a, 1);
-        ptr::copy_nonoverlapping(&*right_swap_tmp, v_b, 1);
+            if is_less(&*v_b, &*v_a) {
+                ptr::swap_nonoverlapping(v_a, v_b, 1);
+            }
+        }
     }
 }