about summary refs log tree commit diff
diff options
context:
space:
mode:
authorbors <bors@rust-lang.org>2021-11-14 15:18:26 +0000
committerbors <bors@rust-lang.org>2021-11-14 15:18:26 +0000
commitc8e94975a6541e947a1bd4971e084c8ba637f2b6 (patch)
tree66f5ba4c28a327483a36b5bfc1e775bb07fb72c7
parent3b2c45441d7eefed63f6658ff8becd5a51eaeae1 (diff)
parentc1ea7bdc87a07c733769fd6adaa16818d692df24 (diff)
downloadrust-c8e94975a6541e947a1bd4971e084c8ba637f2b6.tar.gz
rust-c8e94975a6541e947a1bd4971e084c8ba637f2b6.zip
Auto merge of #90596 - the8472:path-hash-opt, r=Mark-Simulacrum
Optimize Eq and Hash for Path/PathBuf

```
# new

test path::tests::bench_hash_path_long                            ... bench:          86 ns/iter (+/- 1)
test path::tests::bench_hash_path_short                           ... bench:          13 ns/iter (+/- 1)
test path::tests::bench_path_hashset                              ... bench:         197 ns/iter (+/- 6)
test path::tests::bench_path_hashset_miss                         ... bench:          94 ns/iter (+/- 4)

# old

test path::tests::bench_hash_path_long                            ... bench:         192 ns/iter (+/- 2)
test path::tests::bench_hash_path_short                           ... bench:          33 ns/iter (+/- 1)
test path::tests::bench_path_hashset                              ... bench:       1,121 ns/iter (+/- 24)
test path::tests::bench_path_hashset_miss                         ... bench:         273 ns/iter (+/- 6)
```
-rw-r--r--library/std/src/path.rs70
-rw-r--r--library/std/src/path/tests.rs80
-rw-r--r--library/std/src/sys/unix/path.rs1
3 files changed, 140 insertions, 11 deletions
diff --git a/library/std/src/path.rs b/library/std/src/path.rs
index dc0c735a06c..cf2cd5adc48 100644
--- a/library/std/src/path.rs
+++ b/library/std/src/path.rs
@@ -979,6 +979,25 @@ impl FusedIterator for Components<'_> {}
 impl<'a> cmp::PartialEq for Components<'a> {
     #[inline]
     fn eq(&self, other: &Components<'a>) -> bool {
+        let Components { path: _, front: _, back: _, has_physical_root: _, prefix: _ } = self;
+
+        // Fast path for exact matches, e.g. for hashmap lookups.
+        // Don't explicitly compare the prefix or has_physical_root fields since they'll
+        // either be covered by the `path` buffer or are only relevant for `prefix_verbatim()`.
+        if self.path.len() == other.path.len()
+            && self.front == other.front
+            && self.back == State::Body
+            && other.back == State::Body
+            && self.prefix_verbatim() == other.prefix_verbatim()
+        {
+            // possible future improvement: this could bail out earlier if there were a
+            // reverse memcmp/bcmp comparing back to front
+            if self.path == other.path {
+                return true;
+            }
+        }
+
+        // compare back to front since absolute paths often share long prefixes
         Iterator::eq(self.clone().rev(), other.clone().rev())
     }
 }
@@ -1013,13 +1032,12 @@ fn compare_components(mut left: Components<'_>, mut right: Components<'_>) -> cm
     // The fast path isn't taken for paths with a PrefixComponent to avoid backtracking into
     // the middle of one
     if left.prefix.is_none() && right.prefix.is_none() && left.front == right.front {
-        // this might benefit from a [u8]::first_mismatch simd implementation, if it existed
-        let first_difference =
-            match left.path.iter().zip(right.path.iter()).position(|(&a, &b)| a != b) {
-                None if left.path.len() == right.path.len() => return cmp::Ordering::Equal,
-                None => left.path.len().min(right.path.len()),
-                Some(diff) => diff,
-            };
+        // possible future improvement: a [u8]::first_mismatch simd implementation
+        let first_difference = match left.path.iter().zip(right.path).position(|(&a, &b)| a != b) {
+            None if left.path.len() == right.path.len() => return cmp::Ordering::Equal,
+            None => left.path.len().min(right.path.len()),
+            Some(diff) => diff,
+        };
 
         if let Some(previous_sep) =
             left.path[..first_difference].iter().rposition(|&b| left.is_sep_byte(b))
@@ -2873,9 +2891,43 @@ impl cmp::PartialEq for Path {
 #[stable(feature = "rust1", since = "1.0.0")]
 impl Hash for Path {
     fn hash<H: Hasher>(&self, h: &mut H) {
-        for component in self.components() {
-            component.hash(h);
+        let bytes = self.as_u8_slice();
+        let prefix_len = match parse_prefix(&self.inner) {
+            Some(prefix) => {
+                prefix.hash(h);
+                prefix.len()
+            }
+            None => 0,
+        };
+        let bytes = &bytes[prefix_len..];
+
+        let mut component_start = 0;
+        let mut bytes_hashed = 0;
+
+        for i in 0..bytes.len() {
+            if is_sep_byte(bytes[i]) {
+                if i > component_start {
+                    let to_hash = &bytes[component_start..i];
+                    h.write(to_hash);
+                    bytes_hashed += to_hash.len();
+                }
+
+                // skip over separator and optionally a following CurDir item
+                // since components() would normalize these away
+                component_start = i + match bytes[i..] {
+                    [_, b'.', b'/', ..] | [_, b'.'] => 2,
+                    _ => 1,
+                };
+            }
+        }
+
+        if component_start < bytes.len() {
+            let to_hash = &bytes[component_start..];
+            h.write(to_hash);
+            bytes_hashed += to_hash.len();
         }
+
+        h.write_usize(bytes_hashed);
     }
 }
 
diff --git a/library/std/src/path/tests.rs b/library/std/src/path/tests.rs
index 0a16ff2a721..2bf499e1ab8 100644
--- a/library/std/src/path/tests.rs
+++ b/library/std/src/path/tests.rs
@@ -1,6 +1,8 @@
 use super::*;
 
-use crate::collections::BTreeSet;
+use crate::collections::hash_map::DefaultHasher;
+use crate::collections::{BTreeSet, HashSet};
+use crate::hash::Hasher;
 use crate::rc::Rc;
 use crate::sync::Arc;
 use core::hint::black_box;
@@ -1632,7 +1634,25 @@ fn into_rc() {
 fn test_ord() {
     macro_rules! ord(
         ($ord:ident, $left:expr, $right:expr) => ( {
-            assert_eq!(Path::new($left).cmp(&Path::new($right)), core::cmp::Ordering::$ord);
+            use core::cmp::Ordering;
+
+            let left = Path::new($left);
+            let right = Path::new($right);
+            assert_eq!(left.cmp(&right), Ordering::$ord);
+            if (core::cmp::Ordering::$ord == Ordering::Equal) {
+                assert_eq!(left, right);
+
+                let mut hasher = DefaultHasher::new();
+                left.hash(&mut hasher);
+                let left_hash = hasher.finish();
+                hasher = DefaultHasher::new();
+                right.hash(&mut hasher);
+                let right_hash = hasher.finish();
+
+                assert_eq!(left_hash, right_hash, "hashes for {:?} and {:?} must match", left, right);
+            } else {
+                assert_ne!(left, right);
+            }
         });
     );
 
@@ -1693,3 +1713,59 @@ fn bench_path_cmp_fast_path_short(b: &mut test::Bencher) {
         set.insert(paths[500].as_path());
     });
 }
+
+#[bench]
+fn bench_path_hashset(b: &mut test::Bencher) {
+    let prefix = "/my/home/is/my/castle/and/my/castle/has/a/rusty/workbench/";
+    let paths: Vec<_> =
+        (0..1000).map(|num| PathBuf::from(prefix).join(format!("file {}.rs", num))).collect();
+
+    let mut set = HashSet::new();
+
+    paths.iter().for_each(|p| {
+        set.insert(p.as_path());
+    });
+
+    b.iter(|| {
+        set.remove(paths[500].as_path());
+        set.insert(black_box(paths[500].as_path()))
+    });
+}
+
+#[bench]
+fn bench_path_hashset_miss(b: &mut test::Bencher) {
+    let prefix = "/my/home/is/my/castle/and/my/castle/has/a/rusty/workbench/";
+    let paths: Vec<_> =
+        (0..1000).map(|num| PathBuf::from(prefix).join(format!("file {}.rs", num))).collect();
+
+    let mut set = HashSet::new();
+
+    paths.iter().for_each(|p| {
+        set.insert(p.as_path());
+    });
+
+    let probe = PathBuf::from(prefix).join("other");
+
+    b.iter(|| set.remove(black_box(probe.as_path())));
+}
+
+#[bench]
+fn bench_hash_path_short(b: &mut test::Bencher) {
+    let mut hasher = DefaultHasher::new();
+    let path = Path::new("explorer.exe");
+
+    b.iter(|| black_box(path).hash(&mut hasher));
+
+    black_box(hasher.finish());
+}
+
+#[bench]
+fn bench_hash_path_long(b: &mut test::Bencher) {
+    let mut hasher = DefaultHasher::new();
+    let path =
+        Path::new("/aaaaa/aaaaaa/./../aaaaaaaa/bbbbbbbbbbbbb/ccccccccccc/ddddddddd/eeeeeee.fff");
+
+    b.iter(|| black_box(path).hash(&mut hasher));
+
+    black_box(hasher.finish());
+}
diff --git a/library/std/src/sys/unix/path.rs b/library/std/src/sys/unix/path.rs
index 840a7ae0426..717add9ec48 100644
--- a/library/std/src/sys/unix/path.rs
+++ b/library/std/src/sys/unix/path.rs
@@ -11,6 +11,7 @@ pub fn is_verbatim_sep(b: u8) -> bool {
     b == b'/'
 }
 
+#[inline]
 pub fn parse_prefix(_: &OsStr) -> Option<Prefix<'_>> {
     None
 }