about summary refs log tree commit diff
path: root/library/core/src
diff options
context:
space:
mode:
authorbors <bors@rust-lang.org>2023-11-24 07:23:04 +0000
committerbors <bors@rust-lang.org>2023-11-24 07:23:04 +0000
commit8abf920985368264ed4d46e62e1730232e161292 (patch)
tree9208f86204d79052087161df11c31f70f08e8abc /library/core/src
parent41fe75ec6b824d51e5365098c4af9de45e5a2723 (diff)
parentd585eecb053b79ad2ef7034b8283bb684a3dfe5b (diff)
downloadrust-8abf920985368264ed4d46e62e1730232e161292.tar.gz
rust-8abf920985368264ed4d46e62e1730232e161292.zip
Auto merge of #117722 - okaneco:binarysearch, r=thomcc
Refactor `binary_search_by` to use conditional moves

Refactor the if/else checking on `cmp::Ordering` variants to a "branchless" reassignment of left and right.

This change results in fewer branches and instructions.
https://rust.godbolt.org/z/698eYffTx

---

I saw consistent benchmark improvements locally. Performance of worst case seems about the same, maybe slightly faster for the L3 test.

Current
```
slice::binary_search_l1             43.00ns/iter +/- 3.00ns
slice::binary_search_l1_with_dups   25.00ns/iter +/- 0.00ns
slice::binary_search_l1_worst_case  10.00ns/iter +/- 0.00ns
slice::binary_search_l2             64.00ns/iter +/- 1.00ns
slice::binary_search_l2_with_dups   42.00ns/iter +/- 0.00ns
slice::binary_search_l2_worst_case  16.00ns/iter +/- 0.00ns
slice::binary_search_l3            132.00ns/iter +/- 2.00ns
slice::binary_search_l3_with_dups  108.00ns/iter +/- 2.00ns
slice::binary_search_l3_worst_case  33.00ns/iter +/- 3.00ns
```
This PR
```
slice::binary_search_l1            21.00ns/iter +/- 0.00ns
slice::binary_search_l1_with_dups  14.00ns/iter +/- 0.00ns
slice::binary_search_l1_worst_case  9.00ns/iter +/- 0.00ns
slice::binary_search_l2            34.00ns/iter +/- 0.00ns
slice::binary_search_l2_with_dups  23.00ns/iter +/- 0.00ns
slice::binary_search_l2_worst_case 16.00ns/iter +/- 0.00ns
slice::binary_search_l3            92.00ns/iter +/- 3.00ns
slice::binary_search_l3_with_dups  63.00ns/iter +/- 1.00ns
slice::binary_search_l3_worst_case 29.00ns/iter +/- 0.00ns
```
Diffstat (limited to 'library/core/src')
-rw-r--r--library/core/src/slice/mod.rs17
1 files changed, 8 insertions, 9 deletions
diff --git a/library/core/src/slice/mod.rs b/library/core/src/slice/mod.rs
index 6cf5d48a167..998ece3afa9 100644
--- a/library/core/src/slice/mod.rs
+++ b/library/core/src/slice/mod.rs
@@ -6,7 +6,7 @@
 
 #![stable(feature = "rust1", since = "1.0.0")]
 
-use crate::cmp::Ordering::{self, Greater, Less};
+use crate::cmp::Ordering::{self, Equal, Greater, Less};
 use crate::fmt;
 use crate::intrinsics::{assert_unsafe_precondition, exact_div};
 use crate::marker::Copy;
@@ -2854,14 +2854,13 @@ impl<T> [T] {
             // we have `left + size/2 < self.len()`, and this is in-bounds.
             let cmp = f(unsafe { self.get_unchecked(mid) });
 
-            // The reason why we use if/else control flow rather than match
-            // is because match reorders comparison operations, which is perf sensitive.
-            // This is x86 asm for u8: https://rust.godbolt.org/z/8Y8Pra.
-            if cmp == Less {
-                left = mid + 1;
-            } else if cmp == Greater {
-                right = mid;
-            } else {
+            // This control flow produces conditional moves, which results in
+            // fewer branches and instructions than if/else or matching on
+            // cmp::Ordering.
+            // This is x86 asm for u8: https://rust.godbolt.org/z/698eYffTx.
+            left = if cmp == Less { mid + 1 } else { left };
+            right = if cmp == Greater { mid } else { right };
+            if cmp == Equal {
                 // SAFETY: same as the `get_unchecked` above
                 unsafe { crate::intrinsics::assume(mid < self.len()) };
                 return Ok(mid);