about summary refs log tree commit diff
diff options
context:
space:
mode:
authorbors <bors@rust-lang.org>2024-11-13 03:43:59 +0000
committerbors <bors@rust-lang.org>2024-11-13 03:43:59 +0000
commit44f233f2519ce5d633c87c38014d03d8a5f0e810 (patch)
treed236a231230a749a275dd8c8aed908392eb2d14e
parent242f20dc1e95ca8def0ff436d7c844811ae7ac25 (diff)
parent3855cb8048d1d1c1bae810963a88c4c40228fe82 (diff)
downloadrust-44f233f2519ce5d633c87c38014d03d8a5f0e810.tar.gz
rust-44f233f2519ce5d633c87c38014d03d8a5f0e810.zip
Auto merge of #132883 - LaihoE:vectorized_is_sorted, r=thomcc
vectorize slice::is_sorted

Benchmarks using u32 slices:
| len | New | Old |
|--------|----------------------|----------------------|
| 2      | 1.1997 ns           | 889.23 ps           |
| 4      | 1.6479 ns           | 1.5396 ns           |
| 8      | 2.5764 ns           | 2.5633 ns           |
| 16     | 5.4750 ns           | 4.7421 ns           |
| 32     | 11.344 ns           | 8.4634 ns           |
| 64     | 12.105 ns           | 18.104 ns           |
| 128    | 17.263 ns           | 33.185 ns           |
| 256    | 29.465 ns           | 60.928 ns           |
| 512    | 48.926 ns           | 116.19 ns           |
| 1024   | 85.274 ns           | 237.91 ns           |
| 2048   | 160.94 ns           | 469.53 ns           |
| 4096   | 311.60 ns           | 911.43 ns           |
| 8192   | 615.89 ns           | 2.2316 µs           |
| 16384  | 1.2619 µs           | 3.4871 µs           |
| 32768  | 2.5245 µs           | 6.9947 µs           |
| 65536  | 5.2254 µs           | 15.212 µs           |

Seems to be a bit slower on small N but much faster on large N.

Godbolt: https://rust.godbolt.org/z/Txn5MdfKn
-rw-r--r--library/core/src/slice/mod.rs18
1 files changed, 17 insertions, 1 deletions
diff --git a/library/core/src/slice/mod.rs b/library/core/src/slice/mod.rs
index ce9d04d01d7..ef52cc44126 100644
--- a/library/core/src/slice/mod.rs
+++ b/library/core/src/slice/mod.rs
@@ -4097,7 +4097,23 @@ impl<T> [T] {
     where
         T: PartialOrd,
     {
-        self.is_sorted_by(|a, b| a <= b)
+        // This odd number works the best. 32 + 1 extra due to overlapping chunk boundaries.
+        const CHUNK_SIZE: usize = 33;
+        if self.len() < CHUNK_SIZE {
+            return self.windows(2).all(|w| w[0] <= w[1]);
+        }
+        let mut i = 0;
+        // Check in chunks for autovectorization.
+        while i < self.len() - CHUNK_SIZE {
+            let chunk = &self[i..i + CHUNK_SIZE];
+            if !chunk.windows(2).fold(true, |acc, w| acc & (w[0] <= w[1])) {
+                return false;
+            }
+            // We need to ensure that chunk boundaries are also sorted.
+            // Overlap the next chunk with the last element of our last chunk.
+            i += CHUNK_SIZE - 1;
+        }
+        self[i..].windows(2).all(|w| w[0] <= w[1])
     }
 
     /// Checks if the elements of this slice are sorted using the given comparator function.