diff options
| author | bors <bors@rust-lang.org> | 2024-11-13 03:43:59 +0000 |
|---|---|---|
| committer | bors <bors@rust-lang.org> | 2024-11-13 03:43:59 +0000 |
| commit | 44f233f2519ce5d633c87c38014d03d8a5f0e810 (patch) | |
| tree | d236a231230a749a275dd8c8aed908392eb2d14e | |
| parent | 242f20dc1e95ca8def0ff436d7c844811ae7ac25 (diff) | |
| parent | 3855cb8048d1d1c1bae810963a88c4c40228fe82 (diff) | |
| download | rust-44f233f2519ce5d633c87c38014d03d8a5f0e810.tar.gz rust-44f233f2519ce5d633c87c38014d03d8a5f0e810.zip | |
Auto merge of #132883 - LaihoE:vectorized_is_sorted, r=thomcc
vectorize slice::is_sorted Benchmarks using u32 slices: | len | New | Old | |--------|----------------------|----------------------| | 2 | 1.1997 ns | 889.23 ps | | 4 | 1.6479 ns | 1.5396 ns | | 8 | 2.5764 ns | 2.5633 ns | | 16 | 5.4750 ns | 4.7421 ns | | 32 | 11.344 ns | 8.4634 ns | | 64 | 12.105 ns | 18.104 ns | | 128 | 17.263 ns | 33.185 ns | | 256 | 29.465 ns | 60.928 ns | | 512 | 48.926 ns | 116.19 ns | | 1024 | 85.274 ns | 237.91 ns | | 2048 | 160.94 ns | 469.53 ns | | 4096 | 311.60 ns | 911.43 ns | | 8192 | 615.89 ns | 2.2316 µs | | 16384 | 1.2619 µs | 3.4871 µs | | 32768 | 2.5245 µs | 6.9947 µs | | 65536 | 5.2254 µs | 15.212 µs | Seems to be a bit slower on small N but much faster on large N. Godbolt: https://rust.godbolt.org/z/Txn5MdfKn
| -rw-r--r-- | library/core/src/slice/mod.rs | 18 |
1 files changed, 17 insertions, 1 deletions
diff --git a/library/core/src/slice/mod.rs b/library/core/src/slice/mod.rs index ce9d04d01d7..ef52cc44126 100644 --- a/library/core/src/slice/mod.rs +++ b/library/core/src/slice/mod.rs @@ -4097,7 +4097,23 @@ impl<T> [T] { where T: PartialOrd, { - self.is_sorted_by(|a, b| a <= b) + // This odd number works the best. 32 + 1 extra due to overlapping chunk boundaries. + const CHUNK_SIZE: usize = 33; + if self.len() < CHUNK_SIZE { + return self.windows(2).all(|w| w[0] <= w[1]); + } + let mut i = 0; + // Check in chunks for autovectorization. + while i < self.len() - CHUNK_SIZE { + let chunk = &self[i..i + CHUNK_SIZE]; + if !chunk.windows(2).fold(true, |acc, w| acc & (w[0] <= w[1])) { + return false; + } + // We need to ensure that chunk boundaries are also sorted. + // Overlap the next chunk with the last element of our last chunk. + i += CHUNK_SIZE - 1; + } + self[i..].windows(2).all(|w| w[0] <= w[1]) } /// Checks if the elements of this slice are sorted using the given comparator function. |
