about summary refs log tree commit diff
diff options
context:
space:
mode:
authorMatthias Krüger <matthias.krueger@famsik.de>2024-07-07 14:22:00 +0200
committerGitHub <noreply@github.com>2024-07-07 14:22:00 +0200
commit56557c455567998cb808be5ea31218a12150ca6c (patch)
tree7deb33d2e5dc22fbaa4de3e9b968642bb76871fd
parentd84d2214cd5506eb5552528fbe5e6b72ebe6527c (diff)
parentf21683432b72694b4558d779d6b5e5d49102ad7e (diff)
downloadrust-56557c455567998cb808be5ea31218a12150ca6c.tar.gz
rust-56557c455567998cb808be5ea31218a12150ca6c.zip
Rollup merge of #127297 - the8472:path-new-hash, r=Nilstrieb
Improve std::Path's Hash quality by avoiding prefix collisions

This adds a bit rotation to the already existing state so that the same sequence of characters chunked at different offsets into separate path components results in different hashes.

The tests are from #127255

Closes #127254
-rw-r--r--library/std/src/path.rs13
-rw-r--r--library/std/src/path/tests.rs35
2 files changed, 44 insertions, 4 deletions
diff --git a/library/std/src/path.rs b/library/std/src/path.rs
index 8d565e26a16..d5121a554bf 100644
--- a/library/std/src/path.rs
+++ b/library/std/src/path.rs
@@ -3192,15 +3192,19 @@ impl Hash for Path {
         let bytes = &bytes[prefix_len..];
 
         let mut component_start = 0;
-        let mut bytes_hashed = 0;
+        // track some extra state to avoid prefix collisions.
+        // ["foo", "bar"] and ["foobar"], will have the same payload bytes
+        // but result in different chunk_bits
+        let mut chunk_bits: usize = 0;
 
         for i in 0..bytes.len() {
             let is_sep = if verbatim { is_verbatim_sep(bytes[i]) } else { is_sep_byte(bytes[i]) };
             if is_sep {
                 if i > component_start {
                     let to_hash = &bytes[component_start..i];
+                    chunk_bits = chunk_bits.wrapping_add(to_hash.len());
+                    chunk_bits = chunk_bits.rotate_right(2);
                     h.write(to_hash);
-                    bytes_hashed += to_hash.len();
                 }
 
                 // skip over separator and optionally a following CurDir item
@@ -3221,11 +3225,12 @@ impl Hash for Path {
 
         if component_start < bytes.len() {
             let to_hash = &bytes[component_start..];
+            chunk_bits = chunk_bits.wrapping_add(to_hash.len());
+            chunk_bits = chunk_bits.rotate_right(2);
             h.write(to_hash);
-            bytes_hashed += to_hash.len();
         }
 
-        h.write_usize(bytes_hashed);
+        h.write_usize(chunk_bits);
     }
 }
 
diff --git a/library/std/src/path/tests.rs b/library/std/src/path/tests.rs
index bb6126e4941..3ade4fb892f 100644
--- a/library/std/src/path/tests.rs
+++ b/library/std/src/path/tests.rs
@@ -1619,6 +1619,20 @@ pub fn test_compare() {
     relative_from: Some("")
     );
 
+    tc!("foo//", "foo",
+    eq: true,
+    starts_with: true,
+    ends_with: true,
+    relative_from: Some("")
+    );
+
+    tc!("foo///", "foo",
+    eq: true,
+    starts_with: true,
+    ends_with: true,
+    relative_from: Some("")
+    );
+
     tc!("foo/.", "foo",
     eq: true,
     starts_with: true,
@@ -1633,6 +1647,20 @@ pub fn test_compare() {
     relative_from: Some("")
     );
 
+    tc!("foo/.//bar", "foo/bar",
+    eq: true,
+    starts_with: true,
+    ends_with: true,
+    relative_from: Some("")
+    );
+
+    tc!("foo//./bar", "foo/bar",
+    eq: true,
+    starts_with: true,
+    ends_with: true,
+    relative_from: Some("")
+    );
+
     tc!("foo/bar", "foo",
     eq: false,
     starts_with: true,
@@ -1640,6 +1668,13 @@ pub fn test_compare() {
     relative_from: Some("bar")
     );
 
+    tc!("foo/bar", "foobar",
+    eq: false,
+    starts_with: false,
+    ends_with: false,
+    relative_from: None
+    );
+
     tc!("foo/bar/baz", "foo/bar",
     eq: false,
     starts_with: true,