about summary refs log tree commit diff
diff options
context:
space:
mode:
authorThe8472 <git@infinite-source.de>2021-07-06 01:17:20 +0200
committerThe8472 <git@infinite-source.de>2021-07-06 20:20:16 +0200
commit5e877109b400a6f251dbefbbec94f2464c6f1b55 (patch)
tree79e93339c7e3f8031abf7b4ad6e68af06ac72a3f
parent5dcfec332ce9e003f100b4cf9dec895e5634edc3 (diff)
downloadrust-5e877109b400a6f251dbefbbec94f2464c6f1b55.tar.gz
rust-5e877109b400a6f251dbefbbec94f2464c6f1b55.zip
optimize {Path,PathBuf,Components}::{cmp,partial_cmp} for shared prefixes
-rw-r--r--library/std/src/path.rs45
-rw-r--r--library/std/src/path/tests.rs51
2 files changed, 90 insertions, 6 deletions
diff --git a/library/std/src/path.rs b/library/std/src/path.rs
index e75d1d38f8f..7875c9c9967 100644
--- a/library/std/src/path.rs
+++ b/library/std/src/path.rs
@@ -962,7 +962,7 @@ impl cmp::Eq for Components<'_> {}
 impl<'a> cmp::PartialOrd for Components<'a> {
     #[inline]
     fn partial_cmp(&self, other: &Components<'a>) -> Option<cmp::Ordering> {
-        Iterator::partial_cmp(self.clone(), other.clone())
+        Some(compare_components(self.clone(), other.clone()))
     }
 }
 
@@ -970,8 +970,41 @@ impl<'a> cmp::PartialOrd for Components<'a> {
 impl cmp::Ord for Components<'_> {
     #[inline]
     fn cmp(&self, other: &Self) -> cmp::Ordering {
-        Iterator::cmp(self.clone(), other.clone())
+        compare_components(self.clone(), other.clone())
+    }
+}
+
+fn compare_components(mut left: Components<'_>, mut right: Components<'_>) -> cmp::Ordering {
+    // Fast path for long shared prefixes
+    //
+    // - compare raw bytes to find first mismatch
+    // - backtrack to find separator before mismatch to avoid ambiguous parsings of '.' or '..' characters
+    // - if found update state to only do a component-wise comparison on the remainder,
+    //   otherwise do it on the full path
+    //
+    // The fast path isn't taken for paths with a PrefixComponent to avoid backtracking into
+    // the middle of one
+    if left.prefix.is_none() && right.prefix.is_none() && left.front == right.front {
+        // this might benefit from a [u8]::first_mismatch simd implementation, if it existed
+        let first_difference =
+            match left.path.iter().zip(right.path.iter()).position(|(&a, &b)| a != b) {
+                None if left.path.len() == right.path.len() => return cmp::Ordering::Equal,
+                None => left.path.len().min(right.path.len()),
+                Some(diff) => diff,
+            };
+
+        if let Some(previous_sep) =
+            left.path[..first_difference].iter().rposition(|&b| left.is_sep_byte(b))
+        {
+            let mismatched_component_start = previous_sep + 1;
+            left.path = &left.path[mismatched_component_start..];
+            left.front = State::Body;
+            right.path = &right.path[mismatched_component_start..];
+            right.front = State::Body;
+        }
     }
+
+    Iterator::cmp(left, right)
 }
 
 /// An iterator over [`Path`] and its ancestors.
@@ -1704,7 +1737,7 @@ impl cmp::Eq for PathBuf {}
 impl cmp::PartialOrd for PathBuf {
     #[inline]
     fn partial_cmp(&self, other: &PathBuf) -> Option<cmp::Ordering> {
-        self.components().partial_cmp(other.components())
+        Some(compare_components(self.components(), other.components()))
     }
 }
 
@@ -1712,7 +1745,7 @@ impl cmp::PartialOrd for PathBuf {
 impl cmp::Ord for PathBuf {
     #[inline]
     fn cmp(&self, other: &PathBuf) -> cmp::Ordering {
-        self.components().cmp(other.components())
+        compare_components(self.components(), other.components())
     }
 }
 
@@ -2706,7 +2739,7 @@ impl cmp::Eq for Path {}
 impl cmp::PartialOrd for Path {
     #[inline]
     fn partial_cmp(&self, other: &Path) -> Option<cmp::Ordering> {
-        self.components().partial_cmp(other.components())
+        Some(compare_components(self.components(), other.components()))
     }
 }
 
@@ -2714,7 +2747,7 @@ impl cmp::PartialOrd for Path {
 impl cmp::Ord for Path {
     #[inline]
     fn cmp(&self, other: &Path) -> cmp::Ordering {
-        self.components().cmp(other.components())
+        compare_components(self.components(), other.components())
     }
 }
 
diff --git a/library/std/src/path/tests.rs b/library/std/src/path/tests.rs
index 896d6c2a64c..6b7df78d3d7 100644
--- a/library/std/src/path/tests.rs
+++ b/library/std/src/path/tests.rs
@@ -1,7 +1,9 @@
 use super::*;
 
+use crate::collections::BTreeSet;
 use crate::rc::Rc;
 use crate::sync::Arc;
+use core::hint::black_box;
 
 macro_rules! t(
     ($path:expr, iter: $iter:expr) => (
@@ -1392,3 +1394,52 @@ fn into_rc() {
     assert_eq!(&*rc2, path);
     assert_eq!(&*arc2, path);
 }
+
+#[bench]
+fn bench_path_cmp_fast_path_buf_sort(b: &mut test::Bencher) {
+    let prefix = "my/home";
+    let mut paths: Vec<_> =
+        (0..1000).map(|num| PathBuf::from(prefix).join(format!("file {}.rs", num))).collect();
+
+    paths.sort();
+
+    b.iter(|| {
+        black_box(paths.as_mut_slice()).sort_unstable();
+    });
+}
+
+#[bench]
+fn bench_path_cmp_fast_path_long(b: &mut test::Bencher) {
+    let prefix = "/my/home/is/my/castle/and/my/castle/has/a/rusty/workbench/";
+    let paths: Vec<_> =
+        (0..1000).map(|num| PathBuf::from(prefix).join(format!("file {}.rs", num))).collect();
+
+    let mut set = BTreeSet::new();
+
+    paths.iter().for_each(|p| {
+        set.insert(p.as_path());
+    });
+
+    b.iter(|| {
+        set.remove(paths[500].as_path());
+        set.insert(paths[500].as_path());
+    });
+}
+
+#[bench]
+fn bench_path_cmp_fast_path_short(b: &mut test::Bencher) {
+    let prefix = "my/home";
+    let paths: Vec<_> =
+        (0..1000).map(|num| PathBuf::from(prefix).join(format!("file {}.rs", num))).collect();
+
+    let mut set = BTreeSet::new();
+
+    paths.iter().for_each(|p| {
+        set.insert(p.as_path());
+    });
+
+    b.iter(|| {
+        set.remove(paths[500].as_path());
+        set.insert(paths[500].as_path());
+    });
+}