about summary refs log tree commit diff
path: root/library
diff options
context:
space:
mode:
authorbors <bors@rust-lang.org>2021-08-20 05:00:45 +0000
committerbors <bors@rust-lang.org>2021-08-20 05:00:45 +0000
commitbcfd3f7e88084850f87b8e34b4dcb9fceb872d00 (patch)
treedb9a3bde6281b361d983e66c722deea60b92d7fd /library
parent7611fe438dae91084d17022e705bf64374d5ba4b (diff)
parentdfdf3610184b949891b6f8c1b597d07c77293325 (diff)
downloadrust-bcfd3f7e88084850f87b8e34b4dcb9fceb872d00.tar.gz
rust-bcfd3f7e88084850f87b8e34b4dcb9fceb872d00.zip
Auto merge of #86898 - the8472:path-cmp, r=dtolnay
Add fast path for Path::cmp that skips over long shared prefixes

```
# before
test path::tests::bench_path_cmp_fast_path_buf_sort               ... bench:      60,811 ns/iter (+/- 865)
test path::tests::bench_path_cmp_fast_path_long                   ... bench:       6,459 ns/iter (+/- 275)
test path::tests::bench_path_cmp_fast_path_short                  ... bench:       1,777 ns/iter (+/- 34)

# after
test path::tests::bench_path_cmp_fast_path_buf_sort               ... bench:      38,140 ns/iter (+/- 211)
test path::tests::bench_path_cmp_fast_path_long                   ... bench:       1,471 ns/iter (+/- 24)
test path::tests::bench_path_cmp_fast_path_short                  ... bench:       1,106 ns/iter (+/- 9)
```
Diffstat (limited to 'library')
-rw-r--r--library/std/src/path.rs47
-rw-r--r--library/std/src/path/tests.rs68
2 files changed, 108 insertions, 7 deletions
diff --git a/library/std/src/path.rs b/library/std/src/path.rs
index 69419145b1b..654049ba458 100644
--- a/library/std/src/path.rs
+++ b/library/std/src/path.rs
@@ -962,7 +962,7 @@ impl cmp::Eq for Components<'_> {}
 impl<'a> cmp::PartialOrd for Components<'a> {
     #[inline]
     fn partial_cmp(&self, other: &Components<'a>) -> Option<cmp::Ordering> {
-        Iterator::partial_cmp(self.clone(), other.clone())
+        Some(compare_components(self.clone(), other.clone()))
     }
 }
 
@@ -970,8 +970,41 @@ impl<'a> cmp::PartialOrd for Components<'a> {
 impl cmp::Ord for Components<'_> {
     #[inline]
     fn cmp(&self, other: &Self) -> cmp::Ordering {
-        Iterator::cmp(self.clone(), other.clone())
+        compare_components(self.clone(), other.clone())
+    }
+}
+
+fn compare_components(mut left: Components<'_>, mut right: Components<'_>) -> cmp::Ordering {
+    // Fast path for long shared prefixes
+    //
+    // - compare raw bytes to find first mismatch
+    // - backtrack to find separator before mismatch to avoid ambiguous parsings of '.' or '..' characters
+    // - if found update state to only do a component-wise comparison on the remainder,
+    //   otherwise do it on the full path
+    //
+    // The fast path isn't taken for paths with a PrefixComponent to avoid backtracking into
+    // the middle of one
+    if left.prefix.is_none() && right.prefix.is_none() && left.front == right.front {
+        // this might benefit from a [u8]::first_mismatch simd implementation, if it existed
+        let first_difference =
+            match left.path.iter().zip(right.path.iter()).position(|(&a, &b)| a != b) {
+                None if left.path.len() == right.path.len() => return cmp::Ordering::Equal,
+                None => left.path.len().min(right.path.len()),
+                Some(diff) => diff,
+            };
+
+        if let Some(previous_sep) =
+            left.path[..first_difference].iter().rposition(|&b| left.is_sep_byte(b))
+        {
+            let mismatched_component_start = previous_sep + 1;
+            left.path = &left.path[mismatched_component_start..];
+            left.front = State::Body;
+            right.path = &right.path[mismatched_component_start..];
+            right.front = State::Body;
+        }
     }
+
+    Iterator::cmp(left, right)
 }
 
 /// An iterator over [`Path`] and its ancestors.
@@ -1704,7 +1737,7 @@ impl cmp::Eq for PathBuf {}
 impl cmp::PartialOrd for PathBuf {
     #[inline]
     fn partial_cmp(&self, other: &PathBuf) -> Option<cmp::Ordering> {
-        self.components().partial_cmp(other.components())
+        Some(compare_components(self.components(), other.components()))
     }
 }
 
@@ -1712,7 +1745,7 @@ impl cmp::PartialOrd for PathBuf {
 impl cmp::Ord for PathBuf {
     #[inline]
     fn cmp(&self, other: &PathBuf) -> cmp::Ordering {
-        self.components().cmp(other.components())
+        compare_components(self.components(), other.components())
     }
 }
 
@@ -2686,7 +2719,7 @@ impl fmt::Display for Display<'_> {
 impl cmp::PartialEq for Path {
     #[inline]
     fn eq(&self, other: &Path) -> bool {
-        self.components().eq(other.components())
+        self.components() == other.components()
     }
 }
 
@@ -2706,7 +2739,7 @@ impl cmp::Eq for Path {}
 impl cmp::PartialOrd for Path {
     #[inline]
     fn partial_cmp(&self, other: &Path) -> Option<cmp::Ordering> {
-        self.components().partial_cmp(other.components())
+        Some(compare_components(self.components(), other.components()))
     }
 }
 
@@ -2714,7 +2747,7 @@ impl cmp::PartialOrd for Path {
 impl cmp::Ord for Path {
     #[inline]
     fn cmp(&self, other: &Path) -> cmp::Ordering {
-        self.components().cmp(other.components())
+        compare_components(self.components(), other.components())
     }
 }
 
diff --git a/library/std/src/path/tests.rs b/library/std/src/path/tests.rs
index 896d6c2a64c..c10ded08f2c 100644
--- a/library/std/src/path/tests.rs
+++ b/library/std/src/path/tests.rs
@@ -1,7 +1,9 @@
 use super::*;
 
+use crate::collections::BTreeSet;
 use crate::rc::Rc;
 use crate::sync::Arc;
+use core::hint::black_box;
 
 macro_rules! t(
     ($path:expr, iter: $iter:expr) => (
@@ -1392,3 +1394,69 @@ fn into_rc() {
     assert_eq!(&*rc2, path);
     assert_eq!(&*arc2, path);
 }
+
+#[test]
+fn test_ord() {
+    macro_rules! ord(
+        ($ord:ident, $left:expr, $right:expr) => ( {
+            assert_eq!(Path::new($left).cmp(&Path::new($right)), core::cmp::Ordering::$ord);
+        });
+    );
+
+    ord!(Less, "1", "2");
+    ord!(Less, "/foo/bar", "/foo./bar");
+    ord!(Less, "foo/bar", "foo/bar.");
+    ord!(Equal, "foo/./bar", "foo/bar/");
+    ord!(Equal, "foo/bar", "foo/bar/");
+    ord!(Equal, "foo/bar", "foo/bar/.");
+    ord!(Equal, "foo/bar", "foo/bar//");
+}
+
+#[bench]
+fn bench_path_cmp_fast_path_buf_sort(b: &mut test::Bencher) {
+    let prefix = "my/home";
+    let mut paths: Vec<_> =
+        (0..1000).map(|num| PathBuf::from(prefix).join(format!("file {}.rs", num))).collect();
+
+    paths.sort();
+
+    b.iter(|| {
+        black_box(paths.as_mut_slice()).sort_unstable();
+    });
+}
+
+#[bench]
+fn bench_path_cmp_fast_path_long(b: &mut test::Bencher) {
+    let prefix = "/my/home/is/my/castle/and/my/castle/has/a/rusty/workbench/";
+    let paths: Vec<_> =
+        (0..1000).map(|num| PathBuf::from(prefix).join(format!("file {}.rs", num))).collect();
+
+    let mut set = BTreeSet::new();
+
+    paths.iter().for_each(|p| {
+        set.insert(p.as_path());
+    });
+
+    b.iter(|| {
+        set.remove(paths[500].as_path());
+        set.insert(paths[500].as_path());
+    });
+}
+
+#[bench]
+fn bench_path_cmp_fast_path_short(b: &mut test::Bencher) {
+    let prefix = "my/home";
+    let paths: Vec<_> =
+        (0..1000).map(|num| PathBuf::from(prefix).join(format!("file {}.rs", num))).collect();
+
+    let mut set = BTreeSet::new();
+
+    paths.iter().for_each(|p| {
+        set.insert(p.as_path());
+    });
+
+    b.iter(|| {
+        set.remove(paths[500].as_path());
+        set.insert(paths[500].as_path());
+    });
+}