about summary refs log tree commit diff
path: root/src/libstd
diff options
context:
space:
mode:
authorbors <bors@rust-lang.org>2013-08-18 12:41:59 -0700
committerbors <bors@rust-lang.org>2013-08-18 12:41:59 -0700
commite185b049af214cc7900beaae557a12863f4b3ce3 (patch)
tree4347d5393e9a4df2b053b03007397973a6760939 /src/libstd
parent88bd2155d780d2d7d976ff271b6bb25a9b03e119 (diff)
parentbfa1331cd7fb4a8a74ede969abe22004d0232184 (diff)
downloadrust-e185b049af214cc7900beaae557a12863f4b3ce3.tar.gz
rust-e185b049af214cc7900beaae557a12863f4b3ce3.zip
auto merge of #8545 : blake2-ppc/rust/iterbytes, r=alexcrichton
Address issue #5257, for example these values all had the same hash value:

	("aaa", "bbb", "ccc")
	("aaab", "bb", "ccc")
	("aaabbb", "", "ccc")

IterBytes for &[A] now includes the length, before calling iter_bytes on
each element.

IterBytes for &str is now terminated by a byte that does not appear in
UTF-8. This way only one more byte is processed when hashing strings.
Diffstat (limited to 'src/libstd')
-rw-r--r--src/libstd/hash.rs24
-rw-r--r--src/libstd/str/ascii.rs2
-rw-r--r--src/libstd/to_bytes.rs124
3 files changed, 89 insertions, 61 deletions
diff --git a/src/libstd/hash.rs b/src/libstd/hash.rs
index c9d031ed1b1..f3df42f7a43 100644
--- a/src/libstd/hash.rs
+++ b/src/libstd/hash.rs
@@ -409,6 +409,14 @@ mod tests {
 
     use uint;
 
+    // Hash just the bytes of the slice, without length prefix
+    struct Bytes<'self>(&'self [u8]);
+    impl<'self> IterBytes for Bytes<'self> {
+        fn iter_bytes(&self, _lsb0: bool, f: &fn(&[u8]) -> bool) -> bool {
+            f(**self)
+        }
+    }
+
     #[test]
     fn test_siphash() {
         let vecs : [[u8, ..8], ..64] = [
@@ -496,7 +504,7 @@ mod tests {
         while t < 64 {
             debug!("siphash test %?", t);
             let vec = u8to64_le!(vecs[t], 0);
-            let out = buf.hash_keyed(k0, k1);
+            let out = Bytes(buf.as_slice()).hash_keyed(k0, k1);
             debug!("got %?, expected %?", out, vec);
             assert_eq!(vec, out);
 
@@ -587,4 +595,18 @@ mod tests {
     fn test_float_hashes_of_zero() {
         assert_eq!(0.0.hash(), (-0.0).hash());
     }
+
+    #[test]
+    fn test_hash_no_concat_alias() {
+        let s = ("aa", "bb");
+        let t = ("aabb", "");
+        let u = ("a", "abb");
+
+        let v = (&[1u8], &[0u8, 0], &[0u8]);
+        let w = (&[1u8, 0, 0, 0], &[], &[]);
+
+        assert!(v != w);
+        assert!(s.hash() != t.hash() && s.hash() != u.hash());
+        assert!(v.hash() != w.hash());
+    }
 }
diff --git a/src/libstd/str/ascii.rs b/src/libstd/str/ascii.rs
index e0068f5e53e..1cfbf841537 100644
--- a/src/libstd/str/ascii.rs
+++ b/src/libstd/str/ascii.rs
@@ -376,7 +376,6 @@ static ASCII_UPPER_MAP: &'static [u8] = &[
 #[cfg(test)]
 mod tests {
     use super::*;
-    use to_bytes::ToBytes;
     use str::from_char;
 
     macro_rules! v2ascii (
@@ -445,7 +444,6 @@ mod tests {
 
     #[test]
     fn test_ascii_to_bytes() {
-        assert_eq!(v2ascii!(~[40, 32, 59]).to_bytes(false), ~[40u8, 32u8, 59u8]);
         assert_eq!(v2ascii!(~[40, 32, 59]).into_bytes(), ~[40u8, 32u8, 59u8]);
     }
 
diff --git a/src/libstd/to_bytes.rs b/src/libstd/to_bytes.rs
index 4d84b6d251d..198c09964bb 100644
--- a/src/libstd/to_bytes.rs
+++ b/src/libstd/to_bytes.rs
@@ -15,37 +15,43 @@ The `ToBytes` and `IterBytes` traits
 */
 
 use cast;
+use container::Container;
 use io;
 use io::Writer;
 use iterator::Iterator;
 use option::{None, Option, Some};
-use str::StrSlice;
-use vec::ImmutableVector;
+use str::{Str, StrSlice};
+use vec::{Vector, ImmutableVector};
 
 pub type Cb<'self> = &'self fn(buf: &[u8]) -> bool;
 
-/**
- * A trait to implement in order to make a type hashable;
- * This works in combination with the trait `Hash::Hash`, and
- * may in the future be merged with that trait or otherwise
- * modified when default methods and trait inheritance are
- * completed.
- */
+///
+/// A trait to implement in order to make a type hashable;
+/// This works in combination with the trait `std::hash::Hash`, and
+/// may in the future be merged with that trait or otherwise
+/// modified when default methods and trait inheritance are
+/// completed.
+///
+/// IterBytes should be implemented so that the extent of the
+/// produced byte stream can be discovered, given the original
+/// type.
+/// For example, the IterBytes implementation for vectors emits
+/// its length first, and enums should emit their discriminant.
+///
 pub trait IterBytes {
-    /**
-     * Call the provided callback `f` one or more times with
-     * byte-slices that should be used when computing a hash
-     * value or otherwise "flattening" the structure into
-     * a sequence of bytes. The `lsb0` parameter conveys
-     * whether the caller is asking for little-endian bytes
-     * (`true`) or big-endian (`false`); this should only be
-     * relevant in implementations that represent a single
-     * multi-byte datum such as a 32 bit integer or 64 bit
-     * floating-point value. It can be safely ignored for
-     * larger structured types as they are usually processed
-     * left-to-right in declaration order, regardless of
-     * underlying memory endianness.
-     */
+    /// Call the provided callback `f` one or more times with
+    /// byte-slices that should be used when computing a hash
+    /// value or otherwise "flattening" the structure into
+    /// a sequence of bytes. The `lsb0` parameter conveys
+    /// whether the caller is asking for little-endian bytes
+    /// (`true`) or big-endian (`false`); this should only be
+    /// relevant in implementations that represent a single
+    /// multi-byte datum such as a 32 bit integer or 64 bit
+    /// floating-point value. It can be safely ignored for
+    /// larger structured types as they are usually processed
+    /// left-to-right in declaration order, regardless of
+    /// underlying memory endianness.
+    ///
     fn iter_bytes(&self, lsb0: bool, f: Cb) -> bool;
 }
 
@@ -224,74 +230,76 @@ impl IterBytes for f64 {
 impl<'self,A:IterBytes> IterBytes for &'self [A] {
     #[inline]
     fn iter_bytes(&self, lsb0: bool, f: Cb) -> bool {
+        self.len().iter_bytes(lsb0, |b| f(b)) &&
         self.iter().advance(|elt| elt.iter_bytes(lsb0, |b| f(b)))
     }
 }
 
-impl<A:IterBytes,B:IterBytes> IterBytes for (A,B) {
-  #[inline]
-  fn iter_bytes(&self, lsb0: bool, f: Cb) -> bool {
-    match *self {
-      (ref a, ref b) => { a.iter_bytes(lsb0, |b| f(b)) &&
-                          b.iter_bytes(lsb0, |b| f(b)) }
-    }
-  }
-}
-
-impl<A:IterBytes,B:IterBytes,C:IterBytes> IterBytes for (A,B,C) {
-  #[inline]
-  fn iter_bytes(&self, lsb0: bool, f: Cb) -> bool {
-    match *self {
-      (ref a, ref b, ref c) => {
-        a.iter_bytes(lsb0, |b| f(b)) &&
-        b.iter_bytes(lsb0, |b| f(b)) &&
-        c.iter_bytes(lsb0, |b| f(b))
-      }
+impl<A: IterBytes> IterBytes for (A, ) {
+    fn iter_bytes(&self, lsb0: bool, f: Cb) -> bool {
+        match *self {
+            (ref a, ) => a.iter_bytes(lsb0, |b| f(b))
+        }
     }
-  }
 }
 
-// Move this to vec, probably.
-fn borrow<'x,A>(a: &'x [A]) -> &'x [A] {
-    a
-}
+macro_rules! iter_bytes_tuple(
+    ($($A:ident),+) => (
+        impl<$($A: IterBytes),+> IterBytes for ($($A),+) {
+            fn iter_bytes(&self, lsb0: bool, f: Cb) -> bool {
+                match *self {
+                    ($(ref $A),+) => {
+                        $(
+                            $A .iter_bytes(lsb0, |b| f(b))
+                        )&&+
+                    }
+                }
+            }
+        }
+    )
+)
+
+iter_bytes_tuple!(A, B)
+iter_bytes_tuple!(A, B, C)
+iter_bytes_tuple!(A, B, C, D)
+iter_bytes_tuple!(A, B, C, D, E)
+iter_bytes_tuple!(A, B, C, D, E, F)
+iter_bytes_tuple!(A, B, C, D, E, F, G)
+iter_bytes_tuple!(A, B, C, D, E, F, G, H)
 
 impl<A:IterBytes> IterBytes for ~[A] {
     #[inline]
     fn iter_bytes(&self, lsb0: bool, f: Cb) -> bool {
-        borrow(*self).iter_bytes(lsb0, f)
+        self.as_slice().iter_bytes(lsb0, f)
     }
 }
 
 impl<A:IterBytes> IterBytes for @[A] {
     #[inline]
     fn iter_bytes(&self, lsb0: bool, f: Cb) -> bool {
-        borrow(*self).iter_bytes(lsb0, f)
+        self.as_slice().iter_bytes(lsb0, f)
     }
 }
 
 impl<'self> IterBytes for &'self str {
     #[inline]
     fn iter_bytes(&self, _lsb0: bool, f: Cb) -> bool {
-        f(self.as_bytes())
+        // Terminate the string with a byte that does not appear in UTF-8
+        f(self.as_bytes()) && f([0xFF])
     }
 }
 
 impl IterBytes for ~str {
     #[inline]
-    fn iter_bytes(&self, _lsb0: bool, f: Cb) -> bool {
-        // this should possibly include the null terminator, but that
-        // breaks .find_equiv on hashmaps.
-        f(self.as_bytes())
+    fn iter_bytes(&self, lsb0: bool, f: Cb) -> bool {
+        self.as_slice().iter_bytes(lsb0, f)
     }
 }
 
 impl IterBytes for @str {
     #[inline]
-    fn iter_bytes(&self, _lsb0: bool, f: Cb) -> bool {
-        // this should possibly include the null terminator, but that
-        // breaks .find_equiv on hashmaps.
-        f(self.as_bytes())
+    fn iter_bytes(&self, lsb0: bool, f: Cb) -> bool {
+        self.as_slice().iter_bytes(lsb0, f)
     }
 }