about summary refs log tree commit diff
diff options
context:
space:
mode:
authorLukas Bergdoll <lukas.bergdoll@gmail.com>2024-08-26 11:17:38 +0200
committerLukas Bergdoll <lukas.bergdoll@gmail.com>2024-08-26 11:17:38 +0200
commit89c9a2908fc371330edc627b9178db0628c3aee0 (patch)
tree05379b9fc840b5ea2a1de4db38830657423ef6c3
parent13d7b546dada1cb2960dec90875b16a42ff7fb84 (diff)
downloadrust-89c9a2908fc371330edc627b9178db0628c3aee0.tar.gz
rust-89c9a2908fc371330edc627b9178db0628c3aee0.zip
Reduce code duplication by moving partition_lomuto_branchless_simple into quicksort module
-rw-r--r--library/core/src/slice/sort/select.rs78
-rw-r--r--library/core/src/slice/sort/unstable/mod.rs1
-rw-r--r--library/core/src/slice/sort/unstable/quicksort.rs39
3 files changed, 39 insertions, 79 deletions
diff --git a/library/core/src/slice/sort/select.rs b/library/core/src/slice/sort/select.rs
index b60e33b3a2f..2b3e1475530 100644
--- a/library/core/src/slice/sort/select.rs
+++ b/library/core/src/slice/sort/select.rs
@@ -6,13 +6,11 @@
 //! for pivot selection. Using this as a fallback ensures O(n) worst case running time with
 //! better performance than one would get using heapsort as fallback.
 
-use crate::intrinsics;
 use crate::mem::{self, SizedTypeProperties};
 #[cfg(not(feature = "optimize_for_size"))]
 use crate::slice::sort::shared::pivot::choose_pivot;
 #[cfg(not(feature = "optimize_for_size"))]
 use crate::slice::sort::shared::smallsort::insertion_sort_shift_left;
-#[cfg(not(feature = "optimize_for_size"))]
 use crate::slice::sort::unstable::quicksort::partition;
 
 /// Reorders the slice such that the element at `index` is at its final sorted position.
@@ -252,15 +250,7 @@ fn median_of_ninthers<T, F: FnMut(&T, &T) -> bool>(v: &mut [T], is_less: &mut F)
 
     median_of_medians(&mut v[lo..lo + frac], is_less, pivot);
 
-    #[cfg(not(feature = "optimize_for_size"))]
-    {
-        partition(v, lo + pivot, is_less)
-    }
-
-    #[cfg(feature = "optimize_for_size")]
-    {
-        partition_size_opt(v, lo + pivot, is_less)
-    }
+    partition(v, lo + pivot, is_less)
 }
 
 /// Moves around the 9 elements at the indices a..i, such that
@@ -351,69 +341,3 @@ fn bubble_sort<T, F: FnMut(&T, &T) -> bool>(v: &mut [T], is_less: &mut F) {
         n -= 1;
     }
 }
-
-#[cfg(feature = "optimize_for_size")]
-fn partition_size_opt<T, F>(v: &mut [T], pivot: usize, is_less: &mut F) -> usize
-where
-    F: FnMut(&T, &T) -> bool,
-{
-    let len = v.len();
-
-    // Allows for panic-free code-gen by proving this property to the compiler.
-    if len == 0 {
-        return 0;
-    }
-
-    if pivot >= len {
-        intrinsics::abort();
-    }
-
-    // SAFETY: We checked that `pivot` is in-bounds.
-    unsafe {
-        // Place the pivot at the beginning of slice.
-        v.swap_unchecked(0, pivot);
-    }
-    let (pivot, v_without_pivot) = v.split_at_mut(1);
-
-    // Assuming that Rust generates noalias LLVM IR we can be sure that a partition function
-    // signature of the form `(v: &mut [T], pivot: &T)` guarantees that pivot and v can't alias.
-    // Having this guarantee is crucial for optimizations. It's possible to copy the pivot value
-    // into a stack value, but this creates issues for types with interior mutability mandating
-    // a drop guard.
-    let pivot = &mut pivot[0];
-
-    let num_lt = partition_lomuto_branchless_simple(v_without_pivot, pivot, is_less);
-
-    if num_lt >= len {
-        intrinsics::abort();
-    }
-
-    // SAFETY: We checked that `num_lt` is in-bounds.
-    unsafe {
-        // Place the pivot between the two partitions.
-        v.swap_unchecked(0, num_lt);
-    }
-
-    num_lt
-}
-
-#[cfg(feature = "optimize_for_size")]
-fn partition_lomuto_branchless_simple<T, F: FnMut(&T, &T) -> bool>(
-    v: &mut [T],
-    pivot: &T,
-    is_less: &mut F,
-) -> usize {
-    let mut left = 0;
-
-    for right in 0..v.len() {
-        // SAFETY: `left` can at max be incremented by 1 each loop iteration, which implies that
-        // left <= right and that both are in-bounds.
-        unsafe {
-            let right_is_lt = is_less(v.get_unchecked(right), pivot);
-            v.swap_unchecked(left, right);
-            left += right_is_lt as usize;
-        }
-    }
-
-    left
-}
diff --git a/library/core/src/slice/sort/unstable/mod.rs b/library/core/src/slice/sort/unstable/mod.rs
index 43b5cc79fc7..faac97eab02 100644
--- a/library/core/src/slice/sort/unstable/mod.rs
+++ b/library/core/src/slice/sort/unstable/mod.rs
@@ -8,7 +8,6 @@ use crate::slice::sort::shared::find_existing_run;
 use crate::slice::sort::shared::smallsort::insertion_sort_shift_left;
 
 pub(crate) mod heapsort;
-#[cfg(not(feature = "optimize_for_size"))]
 pub(crate) mod quicksort;
 
 /// Unstable sort called ipnsort by Lukas Bergdoll and Orson Peters.
diff --git a/library/core/src/slice/sort/unstable/quicksort.rs b/library/core/src/slice/sort/unstable/quicksort.rs
index e55ebe67e13..83751d99cc2 100644
--- a/library/core/src/slice/sort/unstable/quicksort.rs
+++ b/library/core/src/slice/sort/unstable/quicksort.rs
@@ -1,7 +1,9 @@
 //! This module contains an unstable quicksort and two partition implementations.
 
 use crate::mem::{self, ManuallyDrop};
+#[cfg(not(feature = "optimize_for_size"))]
 use crate::slice::sort::shared::pivot::choose_pivot;
+#[cfg(not(feature = "optimize_for_size"))]
 use crate::slice::sort::shared::smallsort::UnstableSmallSortTypeImpl;
 use crate::{intrinsics, ptr};
 
@@ -11,6 +13,7 @@ use crate::{intrinsics, ptr};
 ///
 /// `limit` is the number of allowed imbalanced partitions before switching to `heapsort`. If zero,
 /// this function will immediately switch to heapsort.
+#[cfg(not(feature = "optimize_for_size"))]
 pub(crate) fn quicksort<'a, T, F>(
     mut v: &'a mut [T],
     mut ancestor_pivot: Option<&'a T>,
@@ -138,7 +141,16 @@ const fn inst_partition<T, F: FnMut(&T, &T) -> bool>() -> fn(&mut [T], &T, &mut
     if mem::size_of::<T>() <= MAX_BRANCHLESS_PARTITION_SIZE {
         // Specialize for types that are relatively cheap to copy, where branchless optimizations
         // have large leverage e.g. `u64` and `String`.
-        partition_lomuto_branchless_cyclic::<T, F>
+
+        #[cfg(not(feature = "optimize_for_size"))]
+        {
+            partition_lomuto_branchless_cyclic::<T, F>
+        }
+
+        #[cfg(feature = "optimize_for_size")]
+        {
+            partition_lomuto_branchless_simple::<T, F>
+        }
     } else {
         partition_hoare_branchy_cyclic::<T, F>
     }
@@ -224,6 +236,7 @@ where
     }
 }
 
+#[cfg(not(feature = "optimize_for_size"))]
 struct PartitionState<T> {
     // The current element that is being looked at, scans left to right through slice.
     right: *mut T,
@@ -234,6 +247,7 @@ struct PartitionState<T> {
     gap: GapGuardRaw<T>,
 }
 
+#[cfg(not(feature = "optimize_for_size"))]
 fn partition_lomuto_branchless_cyclic<T, F>(v: &mut [T], pivot: &T, is_less: &mut F) -> usize
 where
     F: FnMut(&T, &T) -> bool,
@@ -325,6 +339,27 @@ where
     }
 }
 
+#[cfg(feature = "optimize_for_size")]
+fn partition_lomuto_branchless_simple<T, F: FnMut(&T, &T) -> bool>(
+    v: &mut [T],
+    pivot: &T,
+    is_less: &mut F,
+) -> usize {
+    let mut left = 0;
+
+    for right in 0..v.len() {
+        // SAFETY: `left` can at max be incremented by 1 each loop iteration, which implies that
+        // left <= right and that both are in-bounds.
+        unsafe {
+            let right_is_lt = is_less(v.get_unchecked(right), pivot);
+            v.swap_unchecked(left, right);
+            left += right_is_lt as usize;
+        }
+    }
+
+    left
+}
+
 struct GapGuard<T> {
     pos: *mut T,
     value: ManuallyDrop<T>,
@@ -342,11 +377,13 @@ impl<T> Drop for GapGuard<T> {
 
 /// Ideally this wouldn't be needed and we could just use the regular GapGuard.
 /// See comment in [`partition_lomuto_branchless_cyclic`].
+#[cfg(not(feature = "optimize_for_size"))]
 struct GapGuardRaw<T> {
     pos: *mut T,
     value: *mut T,
 }
 
+#[cfg(not(feature = "optimize_for_size"))]
 impl<T> Drop for GapGuardRaw<T> {
     fn drop(&mut self) {
         // SAFETY: `self` MUST be constructed in a way that makes copying the gap value into