diff options
Diffstat (limited to 'src/libstd')
| -rw-r--r-- | src/libstd/collections/hashmap/bench.rs | 130 | ||||
| -rw-r--r-- | src/libstd/collections/hashmap/map.rs (renamed from src/libstd/collections/hashmap.rs) | 1673 | ||||
| -rw-r--r-- | src/libstd/collections/hashmap/mod.rs | 27 | ||||
| -rw-r--r-- | src/libstd/collections/hashmap/set.rs | 696 | ||||
| -rw-r--r-- | src/libstd/collections/hashmap/table.rs | 877 |
5 files changed, 1743 insertions, 1660 deletions
diff --git a/src/libstd/collections/hashmap/bench.rs b/src/libstd/collections/hashmap/bench.rs new file mode 100644 index 00000000000..66d97ba0448 --- /dev/null +++ b/src/libstd/collections/hashmap/bench.rs @@ -0,0 +1,130 @@ +// Copyright 2014 The Rust Project Developers. See the COPYRIGHT +// file at the top-level directory of this distribution and at +// http://rust-lang.org/COPYRIGHT. +// +// Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or +// http://www.apache.org/licenses/LICENSE-2.0> or the MIT license +// <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your +// option. This file may not be copied, modified, or distributed +// except according to those terms. + +#![cfg(test)] + +extern crate test; +use prelude::*; + +use self::test::Bencher; +use iter::{range_inclusive}; + +#[bench] +fn new_drop(b : &mut Bencher) { + use super::HashMap; + + b.iter(|| { + let m : HashMap<int, int> = HashMap::new(); + assert_eq!(m.len(), 0); + }) +} + +#[bench] +fn new_insert_drop(b : &mut Bencher) { + use super::HashMap; + + b.iter(|| { + let mut m = HashMap::new(); + m.insert(0i, 0i); + assert_eq!(m.len(), 1); + }) +} + +#[bench] +fn insert(b: &mut Bencher) { + use super::HashMap; + + let mut m = HashMap::new(); + + for i in range_inclusive(1i, 1000) { + m.insert(i, i); + } + + let mut k = 1001; + + b.iter(|| { + m.insert(k, k); + k += 1; + }); +} + +#[bench] +fn find_existing(b: &mut Bencher) { + use super::HashMap; + + let mut m = HashMap::new(); + + for i in range_inclusive(1i, 1000) { + m.insert(i, i); + } + + b.iter(|| { + for i in range_inclusive(1i, 1000) { + m.contains_key(&i); + } + }); +} + +#[bench] +fn find_nonexisting(b: &mut Bencher) { + use super::HashMap; + + let mut m = HashMap::new(); + + for i in range_inclusive(1i, 1000) { + m.insert(i, i); + } + + b.iter(|| { + for i in range_inclusive(1001i, 2000) { + m.contains_key(&i); + } + }); +} + +#[bench] +fn hashmap_as_queue(b: &mut Bencher) { + use super::HashMap; + + let mut m = HashMap::new(); + + for i in range_inclusive(1i, 1000) { + m.insert(i, i); + } + + let mut k = 1i; + + b.iter(|| { + m.pop(&k); + m.insert(k + 1000, k + 1000); + k += 1; + }); +} + +#[bench] +fn find_pop_insert(b: &mut Bencher) { + use super::HashMap; + + let mut m = HashMap::new(); + + for i in range_inclusive(1i, 1000) { + m.insert(i, i); + } + + let mut k = 1i; + + b.iter(|| { + m.find(&(k + 400)); + m.find(&(k + 2000)); + m.pop(&k); + m.insert(k + 1000, k + 1000); + k += 1; + }) +} diff --git a/src/libstd/collections/hashmap.rs b/src/libstd/collections/hashmap/map.rs index bfe74fed077..7a3779a91a0 100644 --- a/src/libstd/collections/hashmap.rs +++ b/src/libstd/collections/hashmap/map.rs @@ -10,16 +10,15 @@ // // ignore-lexer-test FIXME #15883 -//! Unordered containers, implemented as hash-tables (`HashSet` and `HashMap` types) - use clone::Clone; use cmp::{max, Eq, Equiv, PartialEq}; -use collections::{Collection, Mutable, Set, MutableSet, Map, MutableMap}; +use collections::{Collection, Mutable, MutableSet, Map, MutableMap}; use default::Default; use fmt::Show; use fmt; -use hash::{Hash, Hasher, RandomSipHasher}; -use iter::{Iterator, FromIterator, FilterMap, Chain, Repeat, Zip, Extendable, range}; +use RandomSipHasher; +use hash::{Hash, Hasher}; +use iter::{Iterator, FromIterator, Extendable, range}; use iter; use mem::replace; use num; @@ -28,864 +27,11 @@ use option::{Some, None, Option}; use result::{Ok, Err}; use ops::Index; -use self::table::{BucketWithTable, FullBucketImm, RawTable, FullBucket, FullBucketMut, Bucket}; - -mod table { - use clone::Clone; - use cmp; - use hash::{Hash, Hasher}; - use iter::{Iterator, count}; - use mem::{min_align_of, size_of}; - use mem; - use num::{CheckedMul, is_power_of_two}; - use ops::{Deref, Drop}; - use option::{Some, None, Option}; - use ptr::RawPtr; - use ptr::set_memory; - use ptr::write; - use ptr; - use rt::heap::{allocate, deallocate}; - - static EMPTY_BUCKET: u64 = 0u64; - - /// The raw hashtable, providing safe-ish access to the unzipped and highly - /// optimized arrays of hashes, keys, and values. - /// - /// This design uses less memory and is a lot faster than the naive - /// `Vec<Option<u64, K, V>>`, because we don't pay for the overhead of an - /// option on every element, and we get a generally more cache-aware design. - /// - /// Key invariants of this structure: - /// - /// - if hashes[i] == EMPTY_BUCKET, then keys[i] and vals[i] have - /// 'undefined' contents. Don't read from them. This invariant is - /// enforced outside this module with the `EmptyIndex`, `FullIndex`, - /// and `SafeHash` types. - /// - /// - An `EmptyIndex` is only constructed for a bucket at an index with - /// a hash of EMPTY_BUCKET. - /// - /// - A `FullIndex` is only constructed for a bucket at an index with a - /// non-EMPTY_BUCKET hash. - /// - /// - A `SafeHash` is only constructed for non-`EMPTY_BUCKET` hash. We get - /// around hashes of zero by changing them to 0x8000_0000_0000_0000, - /// which will likely map to the same bucket, while not being confused - /// with "empty". - /// - /// - All three "arrays represented by pointers" are the same length: - /// `capacity`. This is set at creation and never changes. The arrays - /// are unzipped to save space (we don't have to pay for the padding - /// between odd sized elements, such as in a map from u64 to u8), and - /// be more cache aware (scanning through 8 hashes brings in 2 cache - /// lines, since they're all right beside each other). - /// - /// You can kind of think of this module/data structure as a safe wrapper - /// around just the "table" part of the hashtable. It enforces some - /// invariants at the type level and employs some performance trickery, - /// but in general is just a tricked out `Vec<Option<u64, K, V>>`. - /// - /// FIXME(cgaebel): - /// - /// Feb 11, 2014: This hashtable was just implemented, and, hard as I tried, - /// isn't yet totally safe. There's a "known exploit" that you can create - /// multiple FullIndexes for a bucket, `take` one, and then still `take` - /// the other causing undefined behavior. Currently, there's no story - /// for how to protect against this statically. Therefore, there are asserts - /// on `take`, `get`, `get_mut`, and `put` which check the bucket state. - /// With time, and when we're confident this works correctly, they should - /// be removed. Also, the bounds check in `peek` is especially painful, - /// as that's called in the innermost loops of the hashtable and has the - /// potential to be a major performance drain. Remove this too. - /// - /// Or, better than remove, only enable these checks for debug builds. - /// There's currently no "debug-only" asserts in rust, so if you're reading - /// this and going "what? of course there are debug-only asserts!", then - /// please make this use them! - #[unsafe_no_drop_flag] - pub struct RawTable<K, V> { - capacity: uint, - size: uint, - hashes: *mut u64 - } - - /// A bucket that holds a reference to the table - pub trait BucketWithTable<M> { - /// A bucket that holds a reference to the table - fn table<'a>(&'a self) -> &'a M; - - /// Move out the reference to the table. - fn into_table(self) -> M; - - /// Get the raw index. - fn index(&self) -> uint; - } - - struct RawBucket<K, V> { - hash: *mut u64, - key: *mut K, - val: *mut V - } - - pub struct Bucket<K, V, M> { - raw: RawBucket<K, V>, - idx: uint, - table: M - } - - pub struct EmptyBucket<K, V, M> { - raw: RawBucket<K, V>, - idx: uint, - table: M - } - - pub struct FullBucket<K, V, M> { - raw: RawBucket<K, V>, - idx: uint, - table: M - } - - pub type EmptyBucketImm<'table,K,V> = EmptyBucket<K, V, &'table RawTable<K,V>>; - pub type FullBucketImm<'table,K,V> = FullBucket<K, V, &'table RawTable<K,V>>; - - pub type EmptyBucketMut<'table,K,V> = EmptyBucket<K, V, &'table mut RawTable<K,V>>; - pub type FullBucketMut<'table,K,V> = FullBucket<K, V, &'table mut RawTable<K,V>>; - - struct GapThenFull<K, V, M> { - gap: EmptyBucket<K, V, ()>, - full: FullBucket<K, V, M> - } - - impl<K, V, M: Deref<RawTable<K,V>>> GapThenFull<K, V, M> { - pub fn full<'a>(&'a self) -> &'a FullBucket<K, V, M> { - &self.full - } - - pub fn shift(mut self) -> Option<GapThenFull<K, V, M>> { - unsafe { - *self.gap.raw.hash = mem::replace(&mut *self.full.raw.hash, EMPTY_BUCKET); - mem::overwrite(self.gap.raw.key, ptr::read(self.full.raw.key as *const K)); - mem::overwrite(self.gap.raw.val, ptr::read(self.full.raw.val as *const V)); - } - - let FullBucket { raw, idx, .. } = self.full; - - match self.full.next().peek() { - Empty(_) => None, - Full(bucket) => { - self.gap.raw = raw; - self.gap.idx = idx; - - self.full = bucket; - self.full.idx &= self.full.table.capacity - 1; - - Some(self) - } - } - } - } - - impl<K, V> RawPtr<u64> for RawBucket<K, V> { - unsafe fn offset(self, count: int) -> RawBucket<K, V> { - RawBucket { - hash: self.hash.offset(count), - key: self.key.offset(count), - val: self.val.offset(count), - } - } - - fn null() -> RawBucket<K, V> { - RawBucket { - hash: RawPtr::null(), - key: RawPtr::null(), - val: RawPtr::null() - } - } - - fn is_null(&self) -> bool { - self.hash.is_null() - } - - fn to_uint(&self) -> uint { - self.hash.to_uint() - } - - unsafe fn to_option(&self) -> Option<&u64> { - self.hash.to_option() - } - } - - impl<K, V, M: Deref<RawTable<K,V>>> EmptyBucket<K, V, M> { - pub fn next(self) -> Bucket<K, V, M> { - let mut bucket = self.into_bucket(); - bucket.next(); - bucket - } - - pub fn into_bucket(self) -> Bucket<K, V, M> { - Bucket { - raw: self.raw, - idx: self.idx, - table: self.table - } - } - - pub fn gap_peek(self) -> Option<GapThenFull<K, V, M>> { - let gap = EmptyBucket { - raw: self.raw, - idx: self.idx, - table: () - }; - - match self.next().peek() { - Empty(_) => None, - Full(bucket) => { - Some(GapThenFull { - gap: gap, - full: bucket - }) - } - } - } - } - - impl<K, V, M: DerefMut<RawTable<K,V>>> EmptyBucket<K, V, M> { - pub fn put(mut self, hash: SafeHash, key: K, value: V) - -> FullBucket<K, V, M> { - unsafe { - *self.raw.hash = hash.inspect(); - write(self.raw.key, key); - write(self.raw.val, value); - } - - self.table.size += 1; - - FullBucket { raw: self.raw, idx: self.idx, table: self.table } - } - } - - impl<K, V, M: Deref<RawTable<K,V>>> FullBucket<K, V, M> { - pub fn next(self) -> Bucket<K, V, M> { - let mut bucket = self.into_bucket(); - bucket.next(); - bucket - } - - pub fn into_bucket(self) -> Bucket<K, V, M> { - Bucket { - raw: self.raw, - idx: self.idx, - table: self.table - } - } - - pub fn distance(&self) -> uint { - (self.idx - self.hash().inspect() as uint) & (self.table.capacity() - 1) - } - - pub fn hash(&self) -> SafeHash { - unsafe { - SafeHash { - hash: *self.raw.hash - } - } - } - - pub fn read<'a>(&'a self) -> (&'a K, &'a V) { - unsafe { - (&*self.raw.key, - &*self.raw.val) - } - } - - pub fn into_refs(self) -> (&K, &V) { - unsafe { - // debug_assert!(*self.raw.hash != EMPTY_BUCKET); - (&*self.raw.key, - &*self.raw.val) - } - } - } - - impl<K, V, M: DerefMut<RawTable<K,V>>> FullBucket<K, V, M> { - pub fn take(mut self) -> (EmptyBucket<K, V, M>, K, V) { - let key = self.raw.key as *const K; - let val = self.raw.val as *const V; - - self.table.size -= 1; - - unsafe { - *self.raw.hash = EMPTY_BUCKET; - ( - EmptyBucket { - raw: self.raw, - idx: self.idx, - table: self.table - }, - ptr::read(key), - ptr::read(val) - ) - } - } - - pub fn replace(&mut self, h: SafeHash, k: K, v: V) -> (SafeHash, K, V) { - unsafe { - let old_hash = ptr::replace(self.raw.hash as *mut SafeHash, h); - let old_key = ptr::replace(self.raw.key, k); - let old_val = ptr::replace(self.raw.val, v); - - (old_hash, old_key, old_val) - } - } - - pub fn read_mut<'a>(&'a self) -> (&'a mut K, &'a mut V) { - unsafe { - // debug_assert!(*self.raw.hash != EMPTY_BUCKET); - (&mut *self.raw.key, - &mut *self.raw.val) - } - } - - pub fn into_mut_refs(self) -> (&mut K, &mut V) { - unsafe { - // debug_assert!(*self.raw.hash != EMPTY_BUCKET); - (&mut *self.raw.key, - &mut *self.raw.val) - } - } - } - - impl<K, V, M: Deref<RawTable<K,V>>> Bucket<K, V, M> { - pub fn new(table: M, hash: &SafeHash) -> Bucket<K, V, M> { - let ib_index = (hash.inspect() as uint) & (table.capacity() - 1); - Bucket { - raw: unsafe { - table.as_mut_ptrs().offset(ib_index as int) - }, - idx: ib_index, - table: table - } - } - - pub fn at_index(table: M, ib_index: uint) -> Bucket<K, V, M> { - let ib_index = ib_index & (table.capacity() - 1); - Bucket { - raw: unsafe { - table.as_mut_ptrs().offset(ib_index as int) - }, - idx: ib_index, - table: table - } - } - - pub fn first(table: M) -> Bucket<K, V, M> { - Bucket { - raw: table.as_mut_ptrs(), - idx: 0, - table: table - } - } - - pub fn peek(self) -> BucketState<K, V, M> { - match unsafe { *self.raw.hash } { - EMPTY_BUCKET => - Empty(EmptyBucket { - raw: self.raw, - idx: self.idx, - table: self.table - }), - _ => - Full(FullBucket { - raw: self.raw, - idx: self.idx, - table: self.table - }) - } - } - - pub fn next(&mut self) { - self.idx += 1; - - let dist = if self.idx == self.table.capacity() { - -(self.table.capacity() as int - 1) - } else { - 1i - }; - - unsafe { - self.raw = self.raw.offset(dist); - } - } - } - - impl<K, V, M> BucketWithTable<M> for FullBucket<K, V, M> { - fn table<'a>(&'a self) -> &'a M { - &self.table - } - - fn into_table(self) -> M { - self.table - } - - fn index(&self) -> uint { - self.idx - } - } - - impl<K, V, M> BucketWithTable<M> for EmptyBucket<K, V, M> { - fn table<'a>(&'a self) -> &'a M { - &self.table - } - - fn into_table(self) -> M { - self.table - } - - fn index(&self) -> uint { - self.idx - } - } - - impl<K, V, M> BucketWithTable<M> for Bucket<K, V, M> { - fn table<'a>(&'a self) -> &'a M { - &self.table - } - - fn into_table(self) -> M { - self.table - } - - fn index(&self) -> uint { - self.idx - } - } - - impl<'table,K,V> Deref<RawTable<K,V>> for &'table RawTable<K,V> { - fn deref<'a>(&'a self) -> &'a RawTable<K,V> { - &**self - } - } - - impl<'table,K,V> Deref<RawTable<K,V>> for &'table mut RawTable<K,V> { - fn deref<'a>(&'a self) -> &'a RawTable<K,V> { - &**self - } - } - - impl<'table,K,V> DerefMut<RawTable<K,V>> for &'table mut RawTable<K,V> { - fn deref_mut<'a>(&'a mut self) -> &'a mut RawTable<K,V> { - &mut **self - } - } - - pub enum BucketState<K, V, M> { - Empty(EmptyBucket<K, V, M>), - Full(FullBucket<K, V, M>), - } - - /// A hash that is not zero, since we use a hash of zero to represent empty - /// buckets. - #[deriving(PartialEq)] - pub struct SafeHash { - hash: u64, - } - - impl SafeHash { - /// Peek at the hash value, which is guaranteed to be non-zero. - #[inline(always)] - pub fn inspect(&self) -> u64 { self.hash } - } - - /// We need to remove hashes of 0. That's reserved for empty buckets. - /// This function wraps up `hash_keyed` to be the only way outside this - /// module to generate a SafeHash. - pub fn make_hash<T: Hash<S>, S, H: Hasher<S>>(hasher: &H, t: &T) -> SafeHash { - match hasher.hash(t) { - // This constant is exceedingly likely to hash to the same - // bucket, but it won't be counted as empty! - EMPTY_BUCKET => SafeHash { hash: 0x8000_0000_0000_0000 }, - h => SafeHash { hash: h }, - } - } - - fn round_up_to_next(unrounded: uint, target_alignment: uint) -> uint { - assert!(is_power_of_two(target_alignment)); - (unrounded + target_alignment - 1) & !(target_alignment - 1) - } - - #[test] - fn test_rounding() { - assert_eq!(round_up_to_next(0, 4), 0); - assert_eq!(round_up_to_next(1, 4), 4); - assert_eq!(round_up_to_next(2, 4), 4); - assert_eq!(round_up_to_next(3, 4), 4); - assert_eq!(round_up_to_next(4, 4), 4); - assert_eq!(round_up_to_next(5, 4), 8); - } - - // Returns a tuple of (minimum required malloc alignment, hash_offset, - // key_offset, val_offset, array_size), from the start of a mallocated array. - fn calculate_offsets( - hash_size: uint, hash_align: uint, - keys_size: uint, keys_align: uint, - vals_size: uint, vals_align: uint) -> (uint, uint, uint, uint, uint) { - - let hash_offset = 0; - let end_of_hashes = hash_offset + hash_size; - - let keys_offset = round_up_to_next(end_of_hashes, keys_align); - let end_of_keys = keys_offset + keys_size; - - let vals_offset = round_up_to_next(end_of_keys, vals_align); - let end_of_vals = vals_offset + vals_size; - - let min_align = cmp::max(hash_align, cmp::max(keys_align, vals_align)); - - (min_align, hash_offset, keys_offset, vals_offset, end_of_vals) - } - - #[test] - fn test_offset_calculation() { - assert_eq!(calculate_offsets(128, 8, 15, 1, 4, 4 ), (8, 0, 128, 144, 148)); - assert_eq!(calculate_offsets(3, 1, 2, 1, 1, 1 ), (1, 0, 3, 5, 6)); - assert_eq!(calculate_offsets(6, 2, 12, 4, 24, 8), (8, 0, 8, 24, 48)); - } - - impl<K, V> RawTable<K, V> { - - /// Does not initialize the buckets. The caller should ensure they, - /// at the very least, set every hash to EMPTY_BUCKET. - unsafe fn new_uninitialized(capacity: uint) -> RawTable<K, V> { - if capacity == 0 { - return RawTable { - size: 0, - capacity: 0, - hashes: 0 as *mut u64, - }; - } - let hashes_size = capacity.checked_mul(&size_of::<u64>()) - .expect("capacity overflow"); - let keys_size = capacity.checked_mul(&size_of::< K >()) - .expect("capacity overflow"); - let vals_size = capacity.checked_mul(&size_of::< V >()) - .expect("capacity overflow"); - - // Allocating hashmaps is a little tricky. We need to allocate three - // arrays, but since we know their sizes and alignments up front, - // we just allocate a single array, and then have the subarrays - // point into it. - // - // This is great in theory, but in practice getting the alignment - // right is a little subtle. Therefore, calculating offsets has been - // factored out into a different function. - let (malloc_alignment, hash_offset, _, _, size) = - calculate_offsets( - hashes_size, min_align_of::<u64>(), - keys_size, min_align_of::< K >(), - vals_size, min_align_of::< V >()); - - let buffer = allocate(size, malloc_alignment); - - let hashes = buffer.offset(hash_offset as int) as *mut u64; - - RawTable { - capacity: capacity, - size: 0, - hashes: hashes, - } - } - - fn as_mut_ptrs(&self) -> RawBucket<K, V> { - let hashes_size = self.capacity * size_of::<u64>(); - let keys_size = self.capacity * size_of::<K>(); - - let keys_offset = (hashes_size + min_align_of::< K >() - 1) & !(min_align_of::< K >() - 1); - let end_of_keys = keys_offset + keys_size; - - let vals_offset = (end_of_keys + min_align_of::< V >() - 1) & !(min_align_of::< V >() - 1); - - let buffer = self.hashes as *mut u8; - - unsafe { - RawBucket { - hash: self.hashes, - key: buffer.offset(keys_offset as int) as *mut K, - val: buffer.offset(vals_offset as int) as *mut V - } - } - } - - /// Creates a new raw table from a given capacity. All buckets are - /// initially empty. - #[allow(experimental)] - pub fn new(capacity: uint) -> RawTable<K, V> { - unsafe { - let ret = RawTable::new_uninitialized(capacity); - set_memory(ret.hashes, 0u8, capacity); - ret - } - } - - /// The hashtable's capacity, similar to a vector's. - pub fn capacity(&self) -> uint { - self.capacity - } - - /// The number of elements ever `put` in the hashtable, minus the number - /// of elements ever `take`n. - pub fn size(&self) -> uint { - self.size - } - - fn ptrs<'a>(&'a self) -> RawBuckets<'a, K, V> { - RawBuckets { - raw: self.as_mut_ptrs(), - hashes_end: unsafe { - self.hashes.offset(self.capacity as int) - } - } - } - - pub fn iter<'a>(&'a self) -> Entries<'a, K, V> { - Entries { - iter: self.ptrs(), - elems_left: self.size(), - } - } - - pub fn mut_iter<'a>(&'a mut self) -> MutEntries<'a, K, V> { - MutEntries { - iter: self.ptrs(), - elems_left: self.size(), - } - } - - pub fn move_iter(self) -> MoveEntries<K, V> { - MoveEntries { - iter: self.ptrs(), - table: self, - } - } - - pub fn rev_move_buckets<'a>(&'a mut self) -> RevMoveBuckets<'a, K, V> { - let raw_bucket = self.as_mut_ptrs(); - unsafe { - RevMoveBuckets { - raw: raw_bucket.offset(self.capacity as int), - hashes_end: raw_bucket.hash, - elems_left: self.size - } - } - } - } - - pub struct RawBuckets<'a, K, V> { - raw: RawBucket<K, V>, - hashes_end: *mut u64 - } - - impl<'a, K, V> Iterator<RawBucket<K, V>> for RawBuckets<'a, K, V> { - fn next(&mut self) -> Option<RawBucket<K, V>> { - while self.raw.hash != self.hashes_end { - unsafe { - let prev = ptr::replace(&mut self.raw, self.raw.offset(1)); - if *prev.hash != EMPTY_BUCKET { - return Some(prev); - } - } - } - - None - } - } - - pub struct RevMoveBuckets<'a, K, V> { - raw: RawBucket<K, V>, - hashes_end: *mut u64, - elems_left: uint - } - - impl<'a, K, V> Iterator<(K, V)> for RevMoveBuckets<'a, K, V> { - fn next(&mut self) -> Option<(K, V)> { - if self.elems_left == 0 { - return None; - } - - loop { - debug_assert!(self.raw.hash != self.hashes_end); - - unsafe { - self.raw = self.raw.offset(-1); - - if *self.raw.hash != EMPTY_BUCKET { - self.elems_left -= 1; - return Some(( - ptr::read(self.raw.key as *const K), - ptr::read(self.raw.val as *const V) - )); - } - } - } - } - } - - // `read_all_mut` casts a `*u64` to a `*SafeHash`. Since we statically - // ensure that a `FullIndex` points to an index with a non-zero hash, - // and a `SafeHash` is just a `u64` with a different name, this is - // safe. - // - // This test ensures that a `SafeHash` really IS the same size as a - // `u64`. If you need to change the size of `SafeHash` (and - // consequently made this test fail), `read_all_mut` needs to be - // modified to no longer assume this. - #[test] - fn can_alias_safehash_as_u64() { - assert_eq!(size_of::<SafeHash>(), size_of::<u64>()) - } - - /// Iterator over shared references to entries in a table. - pub struct Entries<'a, K:'a, V:'a> { - table: &'a RawTable<K, V>, - idx: uint, - elems_seen: uint, - } - - /// Iterator over mutable references to entries in a table. - pub struct MutEntries<'a, K:'a, V:'a> { - table: &'a mut RawTable<K, V>, - idx: uint, - elems_seen: uint, - } - - /// Iterator over the entries in a table, consuming the table. - pub struct MoveEntries<K, V> { - table: RawTable<K, V>, - iter: RawBuckets<'static, K, V> - } - - impl<'a, K, V> Iterator<(&'a K, &'a V)> for Entries<'a, K, V> { - fn next(&mut self) -> Option<(&'a K, &'a V)> { - self.iter.next().map(|bucket| { - self.elems_left -= 1; - unsafe { - (&*bucket.key, - &*bucket.val) - } - }) - } - - fn size_hint(&self) -> (uint, Option<uint>) { - (self.elems_left, Some(self.elems_left)) - } - } - - impl<'a, K, V> Iterator<(&'a K, &'a mut V)> for MutEntries<'a, K, V> { - fn next(&mut self) -> Option<(&'a K, &'a mut V)> { - self.iter.next().map(|bucket| { - self.elems_left -= 1; - unsafe { - (&*bucket.key, - &mut *bucket.val) - } - }) - } - - fn size_hint(&self) -> (uint, Option<uint>) { - (self.elems_left, Some(self.elems_left)) - } - } - - impl<K, V> Iterator<(SafeHash, K, V)> for MoveEntries<K, V> { - fn next(&mut self) -> Option<(SafeHash, K, V)> { - self.iter.next().map(|bucket| { - self.table.size -= 1; - unsafe { - ( - SafeHash { - hash: *bucket.hash, - }, - ptr::read(bucket.key as *const K), - ptr::read(bucket.val as *const V) - ) - } - }) - } - - fn size_hint(&self) -> (uint, Option<uint>) { - let size = self.table.size(); - (size, Some(size)) - } - } - - impl<K: Clone, V: Clone> Clone for RawTable<K, V> { - fn clone(&self) -> RawTable<K, V> { - unsafe { - let mut new_ht = RawTable::new_uninitialized(self.capacity()); - - { - let cap = self.capacity(); - let mut new_buckets = Bucket::first(&mut new_ht); - let mut buckets = Bucket::first(self); - while buckets.index() != cap { - match buckets.peek() { - Full(full) => { - let (h, k, v) = { - let (k, v) = full.read(); - (full.hash(), k.clone(), v.clone()) - }; - *new_buckets.raw.hash = h.inspect(); - mem::overwrite(new_buckets.raw.key, k); - mem::overwrite(new_buckets.raw.val, v); - } - _ => { - *new_buckets.raw.hash = EMPTY_BUCKET; - } - } - new_buckets.next(); - buckets.next(); - } - } - - new_ht.size = self.size(); - - new_ht - } - } - } - - #[unsafe_destructor] - impl<K, V> Drop for RawTable<K, V> { - fn drop(&mut self) { - if self.hashes.is_null() { - return; - } - // This is in reverse because we're likely to have partially taken - // some elements out with `.move_iter()` from the front. - // Check if the size is 0, so we don't do a useless scan when - // dropping empty tables such as on resize. - // Avoid double free of elements already moved out. - for _ in self.rev_move_buckets() {} - - let hashes_size = self.capacity * size_of::<u64>(); - let keys_size = self.capacity * size_of::<K>(); - let vals_size = self.capacity * size_of::<V>(); - let (align, _, _, _, size) = calculate_offsets(hashes_size, min_align_of::<u64>(), - keys_size, min_align_of::<K>(), - vals_size, min_align_of::<V>()); - - unsafe { - deallocate(self.hashes as *mut u8, size, align); - // Remember how everything was allocated out of one buffer - // during initialization? We only need one call to free here. - } - - self.hashes = RawPtr::null(); - } - } -} +use super::table::{BucketWithTable, FullBucketImm, RawTable, FullBucket, FullBucketMut, Bucket}; +use super::table; static INITIAL_LOG2_CAP: uint = 5; -static INITIAL_CAPACITY: uint = 1 << INITIAL_LOG2_CAP; // 2^5 +pub static INITIAL_CAPACITY: uint = 1 << INITIAL_LOG2_CAP; // 2^5 /// The default behavior of HashMap implements a load factor of 90.9%. /// This behavior is characterized by the following conditions: @@ -1283,7 +429,7 @@ impl<K: Hash + Eq, V> HashMap<K, V, RandomSipHasher> { /// /// ``` /// use std::collections::HashMap; - /// let mut map: HashMap<&str, int> = HashMap::new(); + /// let mut map: HashMap<&str, int> = HashMap::with_capacity(10); /// ``` #[inline] pub fn new() -> HashMap<K, V, RandomSipHasher> { @@ -1365,6 +511,8 @@ impl<K: Eq + Hash<S>, V, S, H: Hasher<S>> HashMap<K, V, H> { /// This function has no effect on the operational semantics of the /// hashtable, only on performance. /// + /// # Example + /// /// ``` /// use std::collections::HashMap; /// let mut map: HashMap<&str, int> = HashMap::new(); @@ -1774,9 +922,9 @@ impl<K: Eq + Hash<S>, V, S, H: Hasher<S>> HashMap<K, V, H> { /// /// # Example /// - /// This is a slightly silly example where we define the number's parity as - /// the equivalence class. It is important that the values hash the same, - /// which is why we override `Hash`. + /// This is a slightly silly example where we define the number's + /// parity as the equivalence class. It is important that the + /// values hash the same, which is why we implement `Hash`. /// /// ``` /// use std::collections::HashMap; @@ -2064,435 +1212,6 @@ impl<K: Eq + Hash<S>, V, S, H: Hasher<S> + Default> Extendable<(K, V)> for HashM } } -/// HashSet iterator -pub type SetItems<'a, K> = - iter::Map<'static, (&'a K, &'a ()), &'a K, Entries<'a, K, ()>>; - -/// HashSet move iterator -pub type SetMoveItems<K> = - iter::Map<'static, (K, ()), K, MoveEntries<K, ()>>; - -/// An implementation of a hash set using the underlying representation of a -/// HashMap where the value is (). As with the `HashMap` type, a `HashSet` -/// requires that the elements implement the `Eq` and `Hash` traits. -/// -/// # Example -/// -/// ``` -/// use std::collections::HashSet; -/// -/// // Type inference lets us omit an explicit type signature (which -/// // would be `HashSet<&str>` in this example). -/// let mut books = HashSet::new(); -/// -/// // Add some books. -/// books.insert("A Dance With Dragons"); -/// books.insert("To Kill a Mockingbird"); -/// books.insert("The Odyssey"); -/// books.insert("The Great Gatsby"); -/// -/// // Check for a specific one. -/// if !books.contains(&("The Winds of Winter")) { -/// println!("We have {} books, but The Winds of Winter ain't one.", -/// books.len()); -/// } -/// -/// // Remove a book. -/// books.remove(&"The Odyssey"); -/// -/// // Iterate over everything. -/// for book in books.iter() { -/// println!("{}", *book); -/// } -/// ``` -/// -/// The easiest way to use `HashSet` with a custom type is to derive -/// `Eq` and `Hash`. We must also derive `PartialEq`, this will in the -/// future be implied by `Eq`. -/// -/// ```rust -/// use std::collections::HashSet; -/// -/// #[deriving(Hash, Eq, PartialEq, Show)] -/// struct Viking<'a> { -/// name: &'a str, -/// power: uint, -/// } -/// -/// let mut vikings = HashSet::new(); -/// -/// vikings.insert(Viking { name: "Einar", power: 9u }); -/// vikings.insert(Viking { name: "Einar", power: 9u }); -/// vikings.insert(Viking { name: "Olaf", power: 4u }); -/// vikings.insert(Viking { name: "Harald", power: 8u }); -/// -/// // Use derived implementation to print the vikings. -/// for x in vikings.iter() { -/// println!("{}", x); -/// } -/// ``` -#[deriving(Clone)] -pub struct HashSet<T, H = RandomSipHasher> { - map: HashMap<T, (), H> -} - -impl<T: Hash + Eq> HashSet<T, RandomSipHasher> { - /// Create an empty HashSet. - /// - /// # Example - /// - /// ``` - /// use std::collections::HashSet; - /// let mut set: HashSet<int> = HashSet::new(); - /// ``` - #[inline] - pub fn new() -> HashSet<T, RandomSipHasher> { - HashSet::with_capacity(INITIAL_CAPACITY) - } - - /// Create an empty HashSet with space for at least `n` elements in - /// the hash table. - /// - /// # Example - /// - /// ``` - /// use std::collections::HashSet; - /// let mut set: HashSet<int> = HashSet::with_capacity(10); - /// ``` - #[inline] - pub fn with_capacity(capacity: uint) -> HashSet<T, RandomSipHasher> { - HashSet { map: HashMap::with_capacity(capacity) } - } -} - -impl<T: Eq + Hash<S>, S, H: Hasher<S>> HashSet<T, H> { - /// Creates a new empty hash set which will use the given hasher to hash - /// keys. - /// - /// The hash set is also created with the default initial capacity. - /// - /// # Example - /// - /// ```rust - /// use std::collections::HashSet; - /// use std::hash::sip::SipHasher; - /// - /// let h = SipHasher::new(); - /// let mut set = HashSet::with_hasher(h); - /// set.insert(2u); - /// ``` - #[inline] - pub fn with_hasher(hasher: H) -> HashSet<T, H> { - HashSet::with_capacity_and_hasher(INITIAL_CAPACITY, hasher) - } - - /// Create an empty HashSet with space for at least `capacity` - /// elements in the hash table, using `hasher` to hash the keys. - /// - /// Warning: `hasher` is normally randomly generated, and - /// is designed to allow `HashSet`s to be resistant to attacks that - /// cause many collisions and very poor performance. Setting it - /// manually using this function can expose a DoS attack vector. - /// - /// # Example - /// - /// ```rust - /// use std::collections::HashSet; - /// use std::hash::sip::SipHasher; - /// - /// let h = SipHasher::new(); - /// let mut set = HashSet::with_capacity_and_hasher(10u, h); - /// set.insert(1i); - /// ``` - #[inline] - pub fn with_capacity_and_hasher(capacity: uint, hasher: H) -> HashSet<T, H> { - HashSet { map: HashMap::with_capacity_and_hasher(capacity, hasher) } - } - - /// Reserve space for at least `n` elements in the hash table. - /// - /// # Example - /// - /// ``` - /// use std::collections::HashSet; - /// let mut set: HashSet<int> = HashSet::new(); - /// set.reserve(10); - /// ``` - pub fn reserve(&mut self, n: uint) { - self.map.reserve(n) - } - - /// Returns true if the hash set contains a value equivalent to the - /// given query value. - /// - /// # Example - /// - /// This is a slightly silly example where we define the number's - /// parity as the equivalence class. It is important that the - /// values hash the same, which is why we implement `Hash`. - /// - /// ```rust - /// use std::collections::HashSet; - /// use std::hash::Hash; - /// use std::hash::sip::SipState; - /// - /// #[deriving(Eq, PartialEq)] - /// struct EvenOrOdd { - /// num: uint - /// }; - /// - /// impl Hash for EvenOrOdd { - /// fn hash(&self, state: &mut SipState) { - /// let parity = self.num % 2; - /// parity.hash(state); - /// } - /// } - /// - /// impl Equiv<EvenOrOdd> for EvenOrOdd { - /// fn equiv(&self, other: &EvenOrOdd) -> bool { - /// self.num % 2 == other.num % 2 - /// } - /// } - /// - /// let mut set = HashSet::new(); - /// set.insert(EvenOrOdd { num: 3u }); - /// - /// assert!(set.contains_equiv(&EvenOrOdd { num: 3u })); - /// assert!(set.contains_equiv(&EvenOrOdd { num: 5u })); - /// assert!(!set.contains_equiv(&EvenOrOdd { num: 4u })); - /// assert!(!set.contains_equiv(&EvenOrOdd { num: 2u })); - /// - /// ``` - pub fn contains_equiv<Q: Hash<S> + Equiv<T>>(&self, value: &Q) -> bool { - self.map.contains_key_equiv(value) - } - - /// An iterator visiting all elements in arbitrary order. - /// Iterator element type is &'a T. - /// - /// # Example - /// - /// ``` - /// use std::collections::HashSet; - /// let mut set = HashSet::new(); - /// set.insert("a"); - /// set.insert("b"); - /// - /// // Will print in an arbitrary order. - /// for x in set.iter() { - /// println!("{}", x); - /// } - /// ``` - pub fn iter<'a>(&'a self) -> SetItems<'a, T> { - self.map.keys() - } - - /// Creates a consuming iterator, that is, one that moves each value out - /// of the set in arbitrary order. The set cannot be used after calling - /// this. - /// - /// # Example - /// - /// ``` - /// use std::collections::HashSet; - /// let mut set = HashSet::new(); - /// set.insert("a".to_string()); - /// set.insert("b".to_string()); - /// - /// // Not possible to collect to a Vec<String> with a regular `.iter()`. - /// let v: Vec<String> = set.move_iter().collect(); - /// - /// // Will print in an arbitrary order. - /// for x in v.iter() { - /// println!("{}", x); - /// } - /// ``` - pub fn move_iter(self) -> SetMoveItems<T> { - self.map.move_iter().map(|(k, _)| k) - } - - /// Visit the values representing the difference. - /// - /// # Example - /// - /// ``` - /// use std::collections::HashSet; - /// let a: HashSet<int> = [1i, 2, 3].iter().map(|&x| x).collect(); - /// let b: HashSet<int> = [4i, 2, 3, 4].iter().map(|&x| x).collect(); - /// - /// // Can be seen as `a - b`. - /// for x in a.difference(&b) { - /// println!("{}", x); // Print 1 - /// } - /// - /// let diff: HashSet<int> = a.difference(&b).map(|&x| x).collect(); - /// assert_eq!(diff, [1i].iter().map(|&x| x).collect()); - /// - /// // Note that difference is not symmetric, - /// // and `b - a` means something else: - /// let diff: HashSet<int> = b.difference(&a).map(|&x| x).collect(); - /// assert_eq!(diff, [4i].iter().map(|&x| x).collect()); - /// ``` - pub fn difference<'a>(&'a self, other: &'a HashSet<T, H>) -> SetAlgebraItems<'a, T, H> { - Repeat::new(other).zip(self.iter()) - .filter_map(|(other, elt)| { - if !other.contains(elt) { Some(elt) } else { None } - }) - } - - /// Visit the values representing the symmetric difference. - /// - /// # Example - /// - /// ``` - /// use std::collections::HashSet; - /// let a: HashSet<int> = [1i, 2, 3].iter().map(|&x| x).collect(); - /// let b: HashSet<int> = [4i, 2, 3, 4].iter().map(|&x| x).collect(); - /// - /// // Print 1, 4 in arbitrary order. - /// for x in a.symmetric_difference(&b) { - /// println!("{}", x); - /// } - /// - /// let diff1: HashSet<int> = a.symmetric_difference(&b).map(|&x| x).collect(); - /// let diff2: HashSet<int> = b.symmetric_difference(&a).map(|&x| x).collect(); - /// - /// assert_eq!(diff1, diff2); - /// assert_eq!(diff1, [1i, 4].iter().map(|&x| x).collect()); - /// ``` - pub fn symmetric_difference<'a>(&'a self, other: &'a HashSet<T, H>) - -> Chain<SetAlgebraItems<'a, T, H>, SetAlgebraItems<'a, T, H>> { - self.difference(other).chain(other.difference(self)) - } - - /// Visit the values representing the intersection. - /// - /// # Example - /// - /// ``` - /// use std::collections::HashSet; - /// let a: HashSet<int> = [1i, 2, 3].iter().map(|&x| x).collect(); - /// let b: HashSet<int> = [4i, 2, 3, 4].iter().map(|&x| x).collect(); - /// - /// // Print 2, 3 in arbitrary order. - /// for x in a.intersection(&b) { - /// println!("{}", x); - /// } - /// - /// let diff: HashSet<int> = a.intersection(&b).map(|&x| x).collect(); - /// assert_eq!(diff, [2i, 3].iter().map(|&x| x).collect()); - /// ``` - pub fn intersection<'a>(&'a self, other: &'a HashSet<T, H>) - -> SetAlgebraItems<'a, T, H> { - Repeat::new(other).zip(self.iter()) - .filter_map(|(other, elt)| { - if other.contains(elt) { Some(elt) } else { None } - }) - } - - /// Visit the values representing the union. - /// - /// # Example - /// - /// ``` - /// use std::collections::HashSet; - /// let a: HashSet<int> = [1i, 2, 3].iter().map(|&x| x).collect(); - /// let b: HashSet<int> = [4i, 2, 3, 4].iter().map(|&x| x).collect(); - /// - /// // Print 1, 2, 3, 4 in arbitrary order. - /// for x in a.union(&b) { - /// println!("{}", x); - /// } - /// - /// let diff: HashSet<int> = a.union(&b).map(|&x| x).collect(); - /// assert_eq!(diff, [1i, 2, 3, 4].iter().map(|&x| x).collect()); - /// ``` - pub fn union<'a>(&'a self, other: &'a HashSet<T, H>) - -> Chain<SetItems<'a, T>, SetAlgebraItems<'a, T, H>> { - self.iter().chain(other.difference(self)) - } -} - -impl<T: Eq + Hash<S>, S, H: Hasher<S>> PartialEq for HashSet<T, H> { - fn eq(&self, other: &HashSet<T, H>) -> bool { - if self.len() != other.len() { return false; } - - self.iter().all(|key| other.contains(key)) - } -} - -impl<T: Eq + Hash<S>, S, H: Hasher<S>> Eq for HashSet<T, H> {} - -impl<T: Eq + Hash<S>, S, H: Hasher<S>> Collection for HashSet<T, H> { - fn len(&self) -> uint { self.map.len() } -} - -impl<T: Eq + Hash<S>, S, H: Hasher<S>> Mutable for HashSet<T, H> { - fn clear(&mut self) { self.map.clear() } -} - -impl<T: Eq + Hash<S>, S, H: Hasher<S>> Set<T> for HashSet<T, H> { - fn contains(&self, value: &T) -> bool { self.map.contains_key(value) } - - fn is_disjoint(&self, other: &HashSet<T, H>) -> bool { - self.iter().all(|v| !other.contains(v)) - } - - fn is_subset(&self, other: &HashSet<T, H>) -> bool { - self.iter().all(|v| other.contains(v)) - } -} - -impl<T: Eq + Hash<S>, S, H: Hasher<S>> MutableSet<T> for HashSet<T, H> { - fn insert(&mut self, value: T) -> bool { self.map.insert(value, ()) } - - fn remove(&mut self, value: &T) -> bool { self.map.remove(value) } -} - - -impl<T: Eq + Hash<S> + fmt::Show, S, H: Hasher<S>> fmt::Show for HashSet<T, H> { - fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { - try!(write!(f, "{{")); - - for (i, x) in self.iter().enumerate() { - if i != 0 { try!(write!(f, ", ")); } - try!(write!(f, "{}", *x)); - } - - write!(f, "}}") - } -} - -impl<T: Eq + Hash<S>, S, H: Hasher<S> + Default> FromIterator<T> for HashSet<T, H> { - fn from_iter<I: Iterator<T>>(iter: I) -> HashSet<T, H> { - let (lower, _) = iter.size_hint(); - let mut set = HashSet::with_capacity_and_hasher(lower, Default::default()); - set.extend(iter); - set - } -} - -impl<T: Eq + Hash<S>, S, H: Hasher<S> + Default> Extendable<T> for HashSet<T, H> { - fn extend<I: Iterator<T>>(&mut self, mut iter: I) { - for k in iter { - self.insert(k); - } - } -} - -impl<T: Eq + Hash<S>, S, H: Hasher<S> + Default> Default for HashSet<T, H> { - fn default() -> HashSet<T, H> { - HashSet::with_hasher(Default::default()) - } -} - -// `Repeat` is used to feed the filter closure an explicit capture -// of a reference to the other set -/// Set operations iterator -pub type SetAlgebraItems<'a, T, H> = - FilterMap<'static, (&'a HashSet<T, H>, &'a T), &'a T, - Zip<Repeat<&'a HashSet<T, H>>, SetItems<'a, T>>>; - #[cfg(test)] mod test_map { use prelude::*; @@ -3084,369 +1803,3 @@ mod test_map { map[4]; } } - -#[cfg(test)] -mod test_set { - use prelude::*; - - use super::HashSet; - use slice::ImmutablePartialEqSlice; - use collections::Collection; - - #[test] - fn test_disjoint() { - let mut xs = HashSet::new(); - let mut ys = HashSet::new(); - assert!(xs.is_disjoint(&ys)); - assert!(ys.is_disjoint(&xs)); - assert!(xs.insert(5i)); - assert!(ys.insert(11i)); - assert!(xs.is_disjoint(&ys)); - assert!(ys.is_disjoint(&xs)); - assert!(xs.insert(7)); - assert!(xs.insert(19)); - assert!(xs.insert(4)); - assert!(ys.insert(2)); - assert!(ys.insert(-11)); - assert!(xs.is_disjoint(&ys)); - assert!(ys.is_disjoint(&xs)); - assert!(ys.insert(7)); - assert!(!xs.is_disjoint(&ys)); - assert!(!ys.is_disjoint(&xs)); - } - - #[test] - fn test_subset_and_superset() { - let mut a = HashSet::new(); - assert!(a.insert(0i)); - assert!(a.insert(5)); - assert!(a.insert(11)); - assert!(a.insert(7)); - - let mut b = HashSet::new(); - assert!(b.insert(0i)); - assert!(b.insert(7)); - assert!(b.insert(19)); - assert!(b.insert(250)); - assert!(b.insert(11)); - assert!(b.insert(200)); - - assert!(!a.is_subset(&b)); - assert!(!a.is_superset(&b)); - assert!(!b.is_subset(&a)); - assert!(!b.is_superset(&a)); - - assert!(b.insert(5)); - - assert!(a.is_subset(&b)); - assert!(!a.is_superset(&b)); - assert!(!b.is_subset(&a)); - assert!(b.is_superset(&a)); - } - - #[test] - fn test_iterate() { - let mut a = HashSet::new(); - for i in range(0u, 32) { - assert!(a.insert(i)); - } - let mut observed: u32 = 0; - for k in a.iter() { - observed |= 1 << *k; - } - assert_eq!(observed, 0xFFFF_FFFF); - } - - #[test] - fn test_intersection() { - let mut a = HashSet::new(); - let mut b = HashSet::new(); - - assert!(a.insert(11i)); - assert!(a.insert(1)); - assert!(a.insert(3)); - assert!(a.insert(77)); - assert!(a.insert(103)); - assert!(a.insert(5)); - assert!(a.insert(-5)); - - assert!(b.insert(2i)); - assert!(b.insert(11)); - assert!(b.insert(77)); - assert!(b.insert(-9)); - assert!(b.insert(-42)); - assert!(b.insert(5)); - assert!(b.insert(3)); - - let mut i = 0; - let expected = [3, 5, 11, 77]; - for x in a.intersection(&b) { - assert!(expected.contains(x)); - i += 1 - } - assert_eq!(i, expected.len()); - } - - #[test] - fn test_difference() { - let mut a = HashSet::new(); - let mut b = HashSet::new(); - - assert!(a.insert(1i)); - assert!(a.insert(3)); - assert!(a.insert(5)); - assert!(a.insert(9)); - assert!(a.insert(11)); - - assert!(b.insert(3i)); - assert!(b.insert(9)); - - let mut i = 0; - let expected = [1, 5, 11]; - for x in a.difference(&b) { - assert!(expected.contains(x)); - i += 1 - } - assert_eq!(i, expected.len()); - } - - #[test] - fn test_symmetric_difference() { - let mut a = HashSet::new(); - let mut b = HashSet::new(); - - assert!(a.insert(1i)); - assert!(a.insert(3)); - assert!(a.insert(5)); - assert!(a.insert(9)); - assert!(a.insert(11)); - - assert!(b.insert(-2i)); - assert!(b.insert(3)); - assert!(b.insert(9)); - assert!(b.insert(14)); - assert!(b.insert(22)); - - let mut i = 0; - let expected = [-2, 1, 5, 11, 14, 22]; - for x in a.symmetric_difference(&b) { - assert!(expected.contains(x)); - i += 1 - } - assert_eq!(i, expected.len()); - } - - #[test] - fn test_union() { - let mut a = HashSet::new(); - let mut b = HashSet::new(); - - assert!(a.insert(1i)); - assert!(a.insert(3)); - assert!(a.insert(5)); - assert!(a.insert(9)); - assert!(a.insert(11)); - assert!(a.insert(16)); - assert!(a.insert(19)); - assert!(a.insert(24)); - - assert!(b.insert(-2i)); - assert!(b.insert(1)); - assert!(b.insert(5)); - assert!(b.insert(9)); - assert!(b.insert(13)); - assert!(b.insert(19)); - - let mut i = 0; - let expected = [-2, 1, 3, 5, 9, 11, 13, 16, 19, 24]; - for x in a.union(&b) { - assert!(expected.contains(x)); - i += 1 - } - assert_eq!(i, expected.len()); - } - - #[test] - fn test_from_iter() { - let xs = [1i, 2, 3, 4, 5, 6, 7, 8, 9]; - - let set: HashSet<int> = xs.iter().map(|&x| x).collect(); - - for x in xs.iter() { - assert!(set.contains(x)); - } - } - - #[test] - fn test_move_iter() { - let hs = { - let mut hs = HashSet::new(); - - hs.insert('a'); - hs.insert('b'); - - hs - }; - - let v = hs.move_iter().collect::<Vec<char>>(); - assert!(['a', 'b'] == v.as_slice() || ['b', 'a'] == v.as_slice()); - } - - #[test] - fn test_eq() { - // These constants once happened to expose a bug in insert(). - // I'm keeping them around to prevent a regression. - let mut s1 = HashSet::new(); - - s1.insert(1i); - s1.insert(2); - s1.insert(3); - - let mut s2 = HashSet::new(); - - s2.insert(1i); - s2.insert(2); - - assert!(s1 != s2); - - s2.insert(3); - - assert_eq!(s1, s2); - } - - #[test] - fn test_show() { - let mut set: HashSet<int> = HashSet::new(); - let empty: HashSet<int> = HashSet::new(); - - set.insert(1i); - set.insert(2); - - let set_str = format!("{}", set); - - assert!(set_str == "{1, 2}".to_string() || set_str == "{2, 1}".to_string()); - assert_eq!(format!("{}", empty), "{}".to_string()); - } -} - -#[cfg(test)] -mod bench { - extern crate test; - use prelude::*; - - use self::test::Bencher; - use iter::{range_inclusive}; - - #[bench] - fn new_drop(b : &mut Bencher) { - use super::HashMap; - - b.iter(|| { - let m : HashMap<int, int> = HashMap::new(); - assert_eq!(m.len(), 0); - }) - } - - #[bench] - fn new_insert_drop(b : &mut Bencher) { - use super::HashMap; - - b.iter(|| { - let mut m = HashMap::new(); - m.insert(0i, 0i); - assert_eq!(m.len(), 1); - }) - } - - #[bench] - fn insert(b: &mut Bencher) { - use super::HashMap; - - let mut m = HashMap::new(); - - for i in range_inclusive(1i, 1000) { - m.insert(i, i); - } - - let mut k = 1001; - - b.iter(|| { - m.insert(k, k); - k += 1; - }); - } - - #[bench] - fn find_existing(b: &mut Bencher) { - use super::HashMap; - - let mut m = HashMap::new(); - - for i in range_inclusive(1i, 1000) { - m.insert(i, i); - } - - b.iter(|| { - for i in range_inclusive(1i, 1000) { - m.contains_key(&i); - } - }); - } - - #[bench] - fn find_nonexisting(b: &mut Bencher) { - use super::HashMap; - - let mut m = HashMap::new(); - - for i in range_inclusive(1i, 1000) { - m.insert(i, i); - } - - b.iter(|| { - for i in range_inclusive(1001i, 2000) { - m.contains_key(&i); - } - }); - } - - #[bench] - fn hashmap_as_queue(b: &mut Bencher) { - use super::HashMap; - - let mut m = HashMap::new(); - - for i in range_inclusive(1i, 1000) { - m.insert(i, i); - } - - let mut k = 1i; - - b.iter(|| { - m.pop(&k); - m.insert(k + 1000, k + 1000); - k += 1; - }); - } - - #[bench] - fn find_pop_insert(b: &mut Bencher) { - use super::HashMap; - - let mut m = HashMap::new(); - - for i in range_inclusive(1i, 1000) { - m.insert(i, i); - } - - let mut k = 1i; - - b.iter(|| { - m.find(&(k + 400)); - m.find(&(k + 2000)); - m.pop(&k); - m.insert(k + 1000, k + 1000); - k += 1; - }) - } -} diff --git a/src/libstd/collections/hashmap/mod.rs b/src/libstd/collections/hashmap/mod.rs new file mode 100644 index 00000000000..f493e844526 --- /dev/null +++ b/src/libstd/collections/hashmap/mod.rs @@ -0,0 +1,27 @@ +// Copyright 2014 The Rust Project Developers. See the COPYRIGHT +// file at the top-level directory of this distribution and at +// http://rust-lang.org/COPYRIGHT. +// +// Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or +// http://www.apache.org/licenses/LICENSE-2.0> or the MIT license +// <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your +// option. This file may not be copied, modified, or distributed +// except according to those terms. + +//! Unordered containers, implemented as hash-tables + +pub use self::map::HashMap; +pub use self::map::Entries; +pub use self::map::MoveEntries; +pub use self::map::Keys; +pub use self::map::Values; +pub use self::map::INITIAL_CAPACITY; +pub use self::set::HashSet; +pub use self::set::SetItems; +pub use self::set::SetMoveItems; +pub use self::set::SetAlgebraItems; + +mod bench; +mod map; +mod set; +mod table; diff --git a/src/libstd/collections/hashmap/set.rs b/src/libstd/collections/hashmap/set.rs new file mode 100644 index 00000000000..a1f71e33303 --- /dev/null +++ b/src/libstd/collections/hashmap/set.rs @@ -0,0 +1,696 @@ +// Copyright 2014 The Rust Project Developers. See the COPYRIGHT +// file at the top-level directory of this distribution and at +// http://rust-lang.org/COPYRIGHT. +// +// Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or +// http://www.apache.org/licenses/LICENSE-2.0> or the MIT license +// <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your +// option. This file may not be copied, modified, or distributed +// except according to those terms. +// +// ignore-lexer-test FIXME #15883 + +use clone::Clone; +use cmp::{Eq, Equiv, PartialEq}; +use collections::{Collection, Mutable, Set, MutableSet, Map, MutableMap}; +use default::Default; +use fmt::Show; +use fmt; +use RandomSipHasher; +use hash::{Hash, Hasher}; +use iter::{Iterator, FromIterator, FilterMap, Chain, Repeat, Zip, Extendable}; +use iter; +use option::{Some, None}; +use result::{Ok, Err}; + +use super::{HashMap, Entries, MoveEntries, INITIAL_CAPACITY}; + +/// HashSet iterator +pub type SetItems<'a, K> = + iter::Map<'static, (&'a K, &'a ()), &'a K, Entries<'a, K, ()>>; + +/// HashSet move iterator +pub type SetMoveItems<K> = + iter::Map<'static, (K, ()), K, MoveEntries<K, ()>>; + +/// An implementation of a hash set using the underlying representation of a +/// HashMap where the value is (). As with the `HashMap` type, a `HashSet` +/// requires that the elements implement the `Eq` and `Hash` traits. +/// +/// # Example +/// +/// ``` +/// use std::collections::HashSet; +/// // Type inference lets us omit an explicit type signature (which +/// // would be `HashSet<&str>` in this example). +/// let mut books = HashSet::new(); +/// +/// // Add some books. +/// books.insert("A Dance With Dragons"); +/// books.insert("To Kill a Mockingbird"); +/// books.insert("The Odyssey"); +/// books.insert("The Great Gatsby"); +/// +/// // Check for a specific one. +/// if !books.contains(&("The Winds of Winter")) { +/// println!("We have {} books, but The Winds of Winter ain't one.", +/// books.len()); +/// } +/// +/// // Remove a book. +/// books.remove(&"The Odyssey"); +/// +/// // Iterate over everything. +/// for book in books.iter() { +/// println!("{}", *book); +/// } +/// ``` +/// +/// The easiest way to use `HashSet` with a custom type is to derive +/// `Eq` and `Hash`. We must also derive `PartialEq`, this will in the +/// future be implied by `Eq`. +/// +/// ``` +/// use std::collections::HashSet; +/// #[deriving(Hash, Eq, PartialEq, Show)] +/// struct Viking<'a> { +/// name: &'a str, +/// power: uint, +/// } +/// +/// let mut vikings = HashSet::new(); +/// +/// vikings.insert(Viking { name: "Einar", power: 9u }); +/// vikings.insert(Viking { name: "Einar", power: 9u }); +/// vikings.insert(Viking { name: "Olaf", power: 4u }); +/// vikings.insert(Viking { name: "Harald", power: 8u }); +/// +/// // Use derived implementation to print the vikings. +/// for x in vikings.iter() { +/// println!("{}", x); +/// } +/// ``` +#[deriving(Clone)] +pub struct HashSet<T, H = RandomSipHasher> { + map: HashMap<T, (), H> +} + +impl<T: Hash + Eq> HashSet<T, RandomSipHasher> { + /// Create an empty HashSet. + /// + /// # Example + /// + /// ``` + /// use std::collections::HashSet; + /// let mut set: HashSet<int> = HashSet::new(); + /// ``` + #[inline] + pub fn new() -> HashSet<T, RandomSipHasher> { + HashSet::with_capacity(INITIAL_CAPACITY) + } + + /// Create an empty HashSet with space for at least `n` elements in + /// the hash table. + /// + /// # Example + /// + /// ``` + /// use std::collections::HashSet; + /// let mut set: HashSet<int> = HashSet::with_capacity(10); + /// ``` + #[inline] + pub fn with_capacity(capacity: uint) -> HashSet<T, RandomSipHasher> { + HashSet { map: HashMap::with_capacity(capacity) } + } +} + +impl<T: Eq + Hash<S>, S, H: Hasher<S>> HashSet<T, H> { + /// Creates a new empty hash set which will use the given hasher to hash + /// keys. + /// + /// The hash set is also created with the default initial capacity. + /// + /// # Example + /// + /// ``` + /// use std::collections::HashSet; + /// use std::hash::sip::SipHasher; + /// + /// let h = SipHasher::new(); + /// let mut set = HashSet::with_hasher(h); + /// set.insert(2u); + /// ``` + #[inline] + pub fn with_hasher(hasher: H) -> HashSet<T, H> { + HashSet::with_capacity_and_hasher(INITIAL_CAPACITY, hasher) + } + + /// Create an empty HashSet with space for at least `capacity` + /// elements in the hash table, using `hasher` to hash the keys. + /// + /// Warning: `hasher` is normally randomly generated, and + /// is designed to allow `HashSet`s to be resistant to attacks that + /// cause many collisions and very poor performance. Setting it + /// manually using this function can expose a DoS attack vector. + /// + /// # Example + /// + /// ``` + /// use std::collections::HashSet; + /// use std::hash::sip::SipHasher; + /// + /// let h = SipHasher::new(); + /// let mut set = HashSet::with_capacity_and_hasher(10u, h); + /// set.insert(1i); + /// ``` + #[inline] + pub fn with_capacity_and_hasher(capacity: uint, hasher: H) -> HashSet<T, H> { + HashSet { map: HashMap::with_capacity_and_hasher(capacity, hasher) } + } + + /// Reserve space for at least `n` elements in the hash table. + /// + /// # Example + /// + /// ``` + /// use std::collections::HashSet; + /// let mut set: HashSet<int> = HashSet::new(); + /// set.reserve(10); + /// ``` + pub fn reserve(&mut self, n: uint) { + self.map.reserve(n) + } + + /// Returns true if the hash set contains a value equivalent to the + /// given query value. + /// + /// # Example + /// + /// This is a slightly silly example where we define the number's + /// parity as the equivilance class. It is important that the + /// values hash the same, which is why we implement `Hash`. + /// + /// ``` + /// use std::collections::HashSet; + /// use std::hash::Hash; + /// use std::hash::sip::SipState; + /// + /// #[deriving(Eq, PartialEq)] + /// struct EvenOrOdd { + /// num: uint + /// }; + /// + /// impl Hash for EvenOrOdd { + /// fn hash(&self, state: &mut SipState) { + /// let parity = self.num % 2; + /// parity.hash(state); + /// } + /// } + /// + /// impl Equiv<EvenOrOdd> for EvenOrOdd { + /// fn equiv(&self, other: &EvenOrOdd) -> bool { + /// self.num % 2 == other.num % 2 + /// } + /// } + /// + /// let mut set = HashSet::new(); + /// set.insert(EvenOrOdd { num: 3u }); + /// + /// assert!(set.contains_equiv(&EvenOrOdd { num: 3u })); + /// assert!(set.contains_equiv(&EvenOrOdd { num: 5u })); + /// assert!(!set.contains_equiv(&EvenOrOdd { num: 4u })); + /// assert!(!set.contains_equiv(&EvenOrOdd { num: 2u })); + /// + /// ``` + pub fn contains_equiv<Q: Hash<S> + Equiv<T>>(&self, value: &Q) -> bool { + self.map.contains_key_equiv(value) + } + + /// An iterator visiting all elements in arbitrary order. + /// Iterator element type is &'a T. + /// + /// # Example + /// + /// ``` + /// use std::collections::HashSet; + /// let mut set = HashSet::new(); + /// set.insert("a"); + /// set.insert("b"); + /// + /// // Will print in an arbitrary order. + /// for x in set.iter() { + /// println!("{}", x); + /// } + /// ``` + pub fn iter<'a>(&'a self) -> SetItems<'a, T> { + self.map.keys() + } + + /// Creates a consuming iterator, that is, one that moves each value out + /// of the set in arbitrary order. The set cannot be used after calling + /// this. + /// + /// # Example + /// + /// ``` + /// use std::collections::HashSet; + /// let mut set = HashSet::new(); + /// set.insert("a".to_string()); + /// set.insert("b".to_string()); + /// + /// // Not possible to collect to a Vec<String> with a regular `.iter()`. + /// let v: Vec<String> = set.move_iter().collect(); + /// + /// // Will print in an arbitrary order. + /// for x in v.iter() { + /// println!("{}", x); + /// } + /// ``` + pub fn move_iter(self) -> SetMoveItems<T> { + self.map.move_iter().map(|(k, _)| k) + } + + /// Visit the values representing the difference. + /// + /// # Example + /// + /// ``` + /// use std::collections::HashSet; + /// let a: HashSet<int> = [1i, 2, 3].iter().map(|&x| x).collect(); + /// let b: HashSet<int> = [4i, 2, 3, 4].iter().map(|&x| x).collect(); + /// + /// // Can be seen as `a - b`. + /// for x in a.difference(&b) { + /// println!("{}", x); // Print 1 + /// } + /// + /// let diff: HashSet<int> = a.difference(&b).map(|&x| x).collect(); + /// assert_eq!(diff, [1i].iter().map(|&x| x).collect()); + /// + /// // Note that difference is not symmetric, + /// // and `b - a` means something else: + /// let diff: HashSet<int> = b.difference(&a).map(|&x| x).collect(); + /// assert_eq!(diff, [4i].iter().map(|&x| x).collect()); + /// ``` + pub fn difference<'a>(&'a self, other: &'a HashSet<T, H>) -> SetAlgebraItems<'a, T, H> { + Repeat::new(other).zip(self.iter()) + .filter_map(|(other, elt)| { + if !other.contains(elt) { Some(elt) } else { None } + }) + } + + /// Visit the values representing the symmetric difference. + /// + /// # Example + /// + /// ``` + /// use std::collections::HashSet; + /// let a: HashSet<int> = [1i, 2, 3].iter().map(|&x| x).collect(); + /// let b: HashSet<int> = [4i, 2, 3, 4].iter().map(|&x| x).collect(); + /// + /// // Print 1, 4 in arbitrary order. + /// for x in a.symmetric_difference(&b) { + /// println!("{}", x); + /// } + /// + /// let diff1: HashSet<int> = a.symmetric_difference(&b).map(|&x| x).collect(); + /// let diff2: HashSet<int> = b.symmetric_difference(&a).map(|&x| x).collect(); + /// + /// assert_eq!(diff1, diff2); + /// assert_eq!(diff1, [1i, 4].iter().map(|&x| x).collect()); + /// ``` + pub fn symmetric_difference<'a>(&'a self, other: &'a HashSet<T, H>) + -> Chain<SetAlgebraItems<'a, T, H>, SetAlgebraItems<'a, T, H>> { + self.difference(other).chain(other.difference(self)) + } + + /// Visit the values representing the intersection. + /// + /// # Example + /// + /// ``` + /// use std::collections::HashSet; + /// let a: HashSet<int> = [1i, 2, 3].iter().map(|&x| x).collect(); + /// let b: HashSet<int> = [4i, 2, 3, 4].iter().map(|&x| x).collect(); + /// + /// // Print 2, 3 in arbitrary order. + /// for x in a.intersection(&b) { + /// println!("{}", x); + /// } + /// + /// let diff: HashSet<int> = a.intersection(&b).map(|&x| x).collect(); + /// assert_eq!(diff, [2i, 3].iter().map(|&x| x).collect()); + /// ``` + pub fn intersection<'a>(&'a self, other: &'a HashSet<T, H>) + -> SetAlgebraItems<'a, T, H> { + Repeat::new(other).zip(self.iter()) + .filter_map(|(other, elt)| { + if other.contains(elt) { Some(elt) } else { None } + }) + } + + /// Visit the values representing the union. + /// + /// # Example + /// + /// ``` + /// use std::collections::HashSet; + /// let a: HashSet<int> = [1i, 2, 3].iter().map(|&x| x).collect(); + /// let b: HashSet<int> = [4i, 2, 3, 4].iter().map(|&x| x).collect(); + /// + /// // Print 1, 2, 3, 4 in arbitrary order. + /// for x in a.union(&b) { + /// println!("{}", x); + /// } + /// + /// let diff: HashSet<int> = a.union(&b).map(|&x| x).collect(); + /// assert_eq!(diff, [1i, 2, 3, 4].iter().map(|&x| x).collect()); + /// ``` + pub fn union<'a>(&'a self, other: &'a HashSet<T, H>) + -> Chain<SetItems<'a, T>, SetAlgebraItems<'a, T, H>> { + self.iter().chain(other.difference(self)) + } +} + +impl<T: Eq + Hash<S>, S, H: Hasher<S>> PartialEq for HashSet<T, H> { + fn eq(&self, other: &HashSet<T, H>) -> bool { + if self.len() != other.len() { return false; } + + self.iter().all(|key| other.contains(key)) + } +} + +impl<T: Eq + Hash<S>, S, H: Hasher<S>> Eq for HashSet<T, H> {} + +impl<T: Eq + Hash<S>, S, H: Hasher<S>> Collection for HashSet<T, H> { + fn len(&self) -> uint { self.map.len() } +} + +impl<T: Eq + Hash<S>, S, H: Hasher<S>> Mutable for HashSet<T, H> { + fn clear(&mut self) { self.map.clear() } +} + +impl<T: Eq + Hash<S>, S, H: Hasher<S>> Set<T> for HashSet<T, H> { + fn contains(&self, value: &T) -> bool { self.map.contains_key(value) } + + fn is_disjoint(&self, other: &HashSet<T, H>) -> bool { + self.iter().all(|v| !other.contains(v)) + } + + fn is_subset(&self, other: &HashSet<T, H>) -> bool { + self.iter().all(|v| other.contains(v)) + } +} + +impl<T: Eq + Hash<S>, S, H: Hasher<S>> MutableSet<T> for HashSet<T, H> { + fn insert(&mut self, value: T) -> bool { self.map.insert(value, ()) } + + fn remove(&mut self, value: &T) -> bool { self.map.remove(value) } +} + +impl<T: Eq + Hash<S> + fmt::Show, S, H: Hasher<S>> fmt::Show for HashSet<T, H> { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + try!(write!(f, "{{")); + + for (i, x) in self.iter().enumerate() { + if i != 0 { try!(write!(f, ", ")); } + try!(write!(f, "{}", *x)); + } + + write!(f, "}}") + } +} + +impl<T: Eq + Hash<S>, S, H: Hasher<S> + Default> FromIterator<T> for HashSet<T, H> { + fn from_iter<I: Iterator<T>>(iter: I) -> HashSet<T, H> { + let (lower, _) = iter.size_hint(); + let mut set = HashSet::with_capacity_and_hasher(lower, Default::default()); + set.extend(iter); + set + } +} + +impl<T: Eq + Hash<S>, S, H: Hasher<S> + Default> Extendable<T> for HashSet<T, H> { + fn extend<I: Iterator<T>>(&mut self, mut iter: I) { + for k in iter { + self.insert(k); + } + } +} + +impl<T: Eq + Hash<S>, S, H: Hasher<S> + Default> Default for HashSet<T, H> { + fn default() -> HashSet<T, H> { + HashSet::with_hasher(Default::default()) + } +} + +// `Repeat` is used to feed the filter closure an explicit capture +// of a reference to the other set +/// Set operations iterator +pub type SetAlgebraItems<'a, T, H> = + FilterMap<'static, (&'a HashSet<T, H>, &'a T), &'a T, + Zip<Repeat<&'a HashSet<T, H>>, SetItems<'a, T>>>; + +#[cfg(test)] +mod test_set { + use prelude::*; + + use super::HashSet; + use slice::ImmutablePartialEqSlice; + use collections::Collection; + + #[test] + fn test_disjoint() { + let mut xs = HashSet::new(); + let mut ys = HashSet::new(); + assert!(xs.is_disjoint(&ys)); + assert!(ys.is_disjoint(&xs)); + assert!(xs.insert(5i)); + assert!(ys.insert(11i)); + assert!(xs.is_disjoint(&ys)); + assert!(ys.is_disjoint(&xs)); + assert!(xs.insert(7)); + assert!(xs.insert(19)); + assert!(xs.insert(4)); + assert!(ys.insert(2)); + assert!(ys.insert(-11)); + assert!(xs.is_disjoint(&ys)); + assert!(ys.is_disjoint(&xs)); + assert!(ys.insert(7)); + assert!(!xs.is_disjoint(&ys)); + assert!(!ys.is_disjoint(&xs)); + } + + #[test] + fn test_subset_and_superset() { + let mut a = HashSet::new(); + assert!(a.insert(0i)); + assert!(a.insert(5)); + assert!(a.insert(11)); + assert!(a.insert(7)); + + let mut b = HashSet::new(); + assert!(b.insert(0i)); + assert!(b.insert(7)); + assert!(b.insert(19)); + assert!(b.insert(250)); + assert!(b.insert(11)); + assert!(b.insert(200)); + + assert!(!a.is_subset(&b)); + assert!(!a.is_superset(&b)); + assert!(!b.is_subset(&a)); + assert!(!b.is_superset(&a)); + + assert!(b.insert(5)); + + assert!(a.is_subset(&b)); + assert!(!a.is_superset(&b)); + assert!(!b.is_subset(&a)); + assert!(b.is_superset(&a)); + } + + #[test] + fn test_iterate() { + let mut a = HashSet::new(); + for i in range(0u, 32) { + assert!(a.insert(i)); + } + let mut observed: u32 = 0; + for k in a.iter() { + observed |= 1 << *k; + } + assert_eq!(observed, 0xFFFF_FFFF); + } + + #[test] + fn test_intersection() { + let mut a = HashSet::new(); + let mut b = HashSet::new(); + + assert!(a.insert(11i)); + assert!(a.insert(1)); + assert!(a.insert(3)); + assert!(a.insert(77)); + assert!(a.insert(103)); + assert!(a.insert(5)); + assert!(a.insert(-5)); + + assert!(b.insert(2i)); + assert!(b.insert(11)); + assert!(b.insert(77)); + assert!(b.insert(-9)); + assert!(b.insert(-42)); + assert!(b.insert(5)); + assert!(b.insert(3)); + + let mut i = 0; + let expected = [3, 5, 11, 77]; + for x in a.intersection(&b) { + assert!(expected.contains(x)); + i += 1 + } + assert_eq!(i, expected.len()); + } + + #[test] + fn test_difference() { + let mut a = HashSet::new(); + let mut b = HashSet::new(); + + assert!(a.insert(1i)); + assert!(a.insert(3)); + assert!(a.insert(5)); + assert!(a.insert(9)); + assert!(a.insert(11)); + + assert!(b.insert(3i)); + assert!(b.insert(9)); + + let mut i = 0; + let expected = [1, 5, 11]; + for x in a.difference(&b) { + assert!(expected.contains(x)); + i += 1 + } + assert_eq!(i, expected.len()); + } + + #[test] + fn test_symmetric_difference() { + let mut a = HashSet::new(); + let mut b = HashSet::new(); + + assert!(a.insert(1i)); + assert!(a.insert(3)); + assert!(a.insert(5)); + assert!(a.insert(9)); + assert!(a.insert(11)); + + assert!(b.insert(-2i)); + assert!(b.insert(3)); + assert!(b.insert(9)); + assert!(b.insert(14)); + assert!(b.insert(22)); + + let mut i = 0; + let expected = [-2, 1, 5, 11, 14, 22]; + for x in a.symmetric_difference(&b) { + assert!(expected.contains(x)); + i += 1 + } + assert_eq!(i, expected.len()); + } + + #[test] + fn test_union() { + let mut a = HashSet::new(); + let mut b = HashSet::new(); + + assert!(a.insert(1i)); + assert!(a.insert(3)); + assert!(a.insert(5)); + assert!(a.insert(9)); + assert!(a.insert(11)); + assert!(a.insert(16)); + assert!(a.insert(19)); + assert!(a.insert(24)); + + assert!(b.insert(-2i)); + assert!(b.insert(1)); + assert!(b.insert(5)); + assert!(b.insert(9)); + assert!(b.insert(13)); + assert!(b.insert(19)); + + let mut i = 0; + let expected = [-2, 1, 3, 5, 9, 11, 13, 16, 19, 24]; + for x in a.union(&b) { + assert!(expected.contains(x)); + i += 1 + } + assert_eq!(i, expected.len()); + } + + #[test] + fn test_from_iter() { + let xs = [1i, 2, 3, 4, 5, 6, 7, 8, 9]; + + let set: HashSet<int> = xs.iter().map(|&x| x).collect(); + + for x in xs.iter() { + assert!(set.contains(x)); + } + } + + #[test] + fn test_move_iter() { + let hs = { + let mut hs = HashSet::new(); + + hs.insert('a'); + hs.insert('b'); + + hs + }; + + let v = hs.move_iter().collect::<Vec<char>>(); + assert!(['a', 'b'] == v.as_slice() || ['b', 'a'] == v.as_slice()); + } + + #[test] + fn test_eq() { + // These constants once happened to expose a bug in insert(). + // I'm keeping them around to prevent a regression. + let mut s1 = HashSet::new(); + + s1.insert(1i); + s1.insert(2); + s1.insert(3); + + let mut s2 = HashSet::new(); + + s2.insert(1i); + s2.insert(2); + + assert!(s1 != s2); + + s2.insert(3); + + assert_eq!(s1, s2); + } + + #[test] + fn test_show() { + let mut set: HashSet<int> = HashSet::new(); + let empty: HashSet<int> = HashSet::new(); + + set.insert(1i); + set.insert(2); + + let set_str = format!("{}", set); + + assert!(set_str == "{1, 2}".to_string() || set_str == "{2, 1}".to_string()); + assert_eq!(format!("{}", empty), "{}".to_string()); + } +} diff --git a/src/libstd/collections/hashmap/table.rs b/src/libstd/collections/hashmap/table.rs new file mode 100644 index 00000000000..96d1a9ba2fb --- /dev/null +++ b/src/libstd/collections/hashmap/table.rs @@ -0,0 +1,877 @@ +// Copyright 2014 The Rust Project Developers. See the COPYRIGHT +// file at the top-level directory of this distribution and at +// http://rust-lang.org/COPYRIGHT. +// +// Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or +// http://www.apache.org/licenses/LICENSE-2.0> or the MIT license +// <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your +// option. This file may not be copied, modified, or distributed +// except according to those terms. +// +// ignore-lexer-test FIXME #15883 + +use clone::Clone; +use cmp; +use hash::{Hash, Hasher}; +use iter::{Iterator, count}; +use mem::{min_align_of, size_of}; +use mem; +use num::{CheckedMul, is_power_of_two}; +use ops::{Deref, DerefMut, Drop}; +use option::{Some, None, Option}; +use ptr::RawPtr; +use ptr::set_memory; +use ptr::write; +use ptr; +use rt::heap::{allocate, deallocate}; + +static EMPTY_BUCKET: u64 = 0u64; + +/// The raw hashtable, providing safe-ish access to the unzipped and highly +/// optimized arrays of hashes, keys, and values. +/// +/// This design uses less memory and is a lot faster than the naive +/// `Vec<Option<u64, K, V>>`, because we don't pay for the overhead of an +/// option on every element, and we get a generally more cache-aware design. +/// +/// Key invariants of this structure: +/// +/// - if hashes[i] == EMPTY_BUCKET, then keys[i] and vals[i] have +/// 'undefined' contents. Don't read from them. This invariant is +/// enforced outside this module with the `EmptyIndex`, `FullIndex`, +/// and `SafeHash` types. +/// +/// - An `EmptyIndex` is only constructed for a bucket at an index with +/// a hash of EMPTY_BUCKET. +/// +/// - A `FullIndex` is only constructed for a bucket at an index with a +/// non-EMPTY_BUCKET hash. +/// +/// - A `SafeHash` is only constructed for non-`EMPTY_BUCKET` hash. We get +/// around hashes of zero by changing them to 0x8000_0000_0000_0000, +/// which will likely map to the same bucket, while not being confused +/// with "empty". +/// +/// - All three "arrays represented by pointers" are the same length: +/// `capacity`. This is set at creation and never changes. The arrays +/// are unzipped to save space (we don't have to pay for the padding +/// between odd sized elements, such as in a map from u64 to u8), and +/// be more cache aware (scanning through 8 hashes brings in 2 cache +/// lines, since they're all right beside each other). +/// +/// You can kind of think of this module/data structure as a safe wrapper +/// around just the "table" part of the hashtable. It enforces some +/// invariants at the type level and employs some performance trickery, +/// but in general is just a tricked out `Vec<Option<u64, K, V>>`. +/// +/// FIXME(cgaebel): +/// +/// Feb 11, 2014: This hashtable was just implemented, and, hard as I tried, +/// isn't yet totally safe. There's a "known exploit" that you can create +/// multiple FullIndexes for a bucket, `take` one, and then still `take` +/// the other causing undefined behavior. Currently, there's no story +/// for how to protect against this statically. Therefore, there are asserts +/// on `take`, `get`, `get_mut`, and `put` which check the bucket state. +/// With time, and when we're confident this works correctly, they should +/// be removed. Also, the bounds check in `peek` is especially painful, +/// as that's called in the innermost loops of the hashtable and has the +/// potential to be a major performance drain. Remove this too. +/// +/// Or, better than remove, only enable these checks for debug builds. +/// There's currently no "debug-only" asserts in rust, so if you're reading +/// this and going "what? of course there are debug-only asserts!", then +/// please make this use them! +#[unsafe_no_drop_flag] +pub struct RawTable<K, V> { + capacity: uint, + size: uint, + hashes: *mut u64 +} + +/// A bucket that holds a reference to the table +pub trait BucketWithTable<M> { + /// A bucket that holds a reference to the table + fn table<'a>(&'a self) -> &'a M; + + /// Move out the reference to the table. + fn into_table(self) -> M; + + /// Get the raw index. + fn index(&self) -> uint; +} + +struct RawBucket<K, V> { + hash: *mut u64, + key: *mut K, + val: *mut V +} + +pub struct Bucket<K, V, M> { + raw: RawBucket<K, V>, + idx: uint, + table: M +} + +pub struct EmptyBucket<K, V, M> { + raw: RawBucket<K, V>, + idx: uint, + table: M +} + +pub struct FullBucket<K, V, M> { + raw: RawBucket<K, V>, + idx: uint, + table: M +} + +pub type EmptyBucketImm<'table,K,V> = EmptyBucket<K, V, &'table RawTable<K,V>>; +pub type FullBucketImm<'table,K,V> = FullBucket<K, V, &'table RawTable<K,V>>; + +pub type EmptyBucketMut<'table,K,V> = EmptyBucket<K, V, &'table mut RawTable<K,V>>; +pub type FullBucketMut<'table,K,V> = FullBucket<K, V, &'table mut RawTable<K,V>>; + +struct GapThenFull<K, V, M> { + gap: EmptyBucket<K, V, ()>, + full: FullBucket<K, V, M> +} + +impl<K, V, M: Deref<RawTable<K,V>>> GapThenFull<K, V, M> { + pub fn full<'a>(&'a self) -> &'a FullBucket<K, V, M> { + &self.full + } + + pub fn shift(mut self) -> Option<GapThenFull<K, V, M>> { + unsafe { + *self.gap.raw.hash = mem::replace(&mut *self.full.raw.hash, EMPTY_BUCKET); + mem::overwrite(self.gap.raw.key, ptr::read(self.full.raw.key as *const K)); + mem::overwrite(self.gap.raw.val, ptr::read(self.full.raw.val as *const V)); + } + + let FullBucket { raw, idx, .. } = self.full; + + match self.full.next().peek() { + Empty(_) => None, + Full(bucket) => { + self.gap.raw = raw; + self.gap.idx = idx; + + self.full = bucket; + self.full.idx &= self.full.table.capacity - 1; + + Some(self) + } + } + } +} + +impl<K, V> RawPtr<u64> for RawBucket<K, V> { + unsafe fn offset(self, count: int) -> RawBucket<K, V> { + RawBucket { + hash: self.hash.offset(count), + key: self.key.offset(count), + val: self.val.offset(count), + } + } + + fn null() -> RawBucket<K, V> { + RawBucket { + hash: RawPtr::null(), + key: RawPtr::null(), + val: RawPtr::null() + } + } + + fn is_null(&self) -> bool { + self.hash.is_null() + } + + fn to_uint(&self) -> uint { + self.hash.to_uint() + } + + unsafe fn to_option(&self) -> Option<&u64> { + self.hash.to_option() + } +} + +impl<K, V, M: Deref<RawTable<K,V>>> EmptyBucket<K, V, M> { + pub fn next(self) -> Bucket<K, V, M> { + let mut bucket = self.into_bucket(); + bucket.next(); + bucket + } + + pub fn into_bucket(self) -> Bucket<K, V, M> { + Bucket { + raw: self.raw, + idx: self.idx, + table: self.table + } + } + + pub fn gap_peek(self) -> Option<GapThenFull<K, V, M>> { + let gap = EmptyBucket { + raw: self.raw, + idx: self.idx, + table: () + }; + + match self.next().peek() { + Empty(_) => None, + Full(bucket) => { + Some(GapThenFull { + gap: gap, + full: bucket + }) + } + } + } +} + +impl<K, V, M: DerefMut<RawTable<K,V>>> EmptyBucket<K, V, M> { + pub fn put(mut self, hash: SafeHash, key: K, value: V) + -> FullBucket<K, V, M> { + unsafe { + *self.raw.hash = hash.inspect(); + write(self.raw.key, key); + write(self.raw.val, value); + } + + self.table.size += 1; + + FullBucket { raw: self.raw, idx: self.idx, table: self.table } + } +} + +impl<K, V, M: Deref<RawTable<K,V>>> FullBucket<K, V, M> { + pub fn next(self) -> Bucket<K, V, M> { + let mut bucket = self.into_bucket(); + bucket.next(); + bucket + } + + pub fn into_bucket(self) -> Bucket<K, V, M> { + Bucket { + raw: self.raw, + idx: self.idx, + table: self.table + } + } + + pub fn distance(&self) -> uint { + (self.idx - self.hash().inspect() as uint) & (self.table.capacity() - 1) + } + + pub fn hash(&self) -> SafeHash { + unsafe { + SafeHash { + hash: *self.raw.hash + } + } + } + + pub fn read<'a>(&'a self) -> (&'a K, &'a V) { + unsafe { + (&*self.raw.key, + &*self.raw.val) + } + } + + pub fn into_refs(self) -> (&K, &V) { + unsafe { + // debug_assert!(*self.raw.hash != EMPTY_BUCKET); + (&*self.raw.key, + &*self.raw.val) + } + } +} + +impl<K, V, M: DerefMut<RawTable<K,V>>> FullBucket<K, V, M> { + pub fn take(mut self) -> (EmptyBucket<K, V, M>, K, V) { + let key = self.raw.key as *const K; + let val = self.raw.val as *const V; + + self.table.size -= 1; + + unsafe { + *self.raw.hash = EMPTY_BUCKET; + ( + EmptyBucket { + raw: self.raw, + idx: self.idx, + table: self.table + }, + ptr::read(key), + ptr::read(val) + ) + } + } + + pub fn replace(&mut self, h: SafeHash, k: K, v: V) -> (SafeHash, K, V) { + unsafe { + let old_hash = ptr::replace(self.raw.hash as *mut SafeHash, h); + let old_key = ptr::replace(self.raw.key, k); + let old_val = ptr::replace(self.raw.val, v); + + (old_hash, old_key, old_val) + } + } + + pub fn read_mut<'a>(&'a self) -> (&'a mut K, &'a mut V) { + unsafe { + // debug_assert!(*self.raw.hash != EMPTY_BUCKET); + (&mut *self.raw.key, + &mut *self.raw.val) + } + } + + pub fn into_mut_refs(self) -> (&mut K, &mut V) { + unsafe { + // debug_assert!(*self.raw.hash != EMPTY_BUCKET); + (&mut *self.raw.key, + &mut *self.raw.val) + } + } +} + +impl<K, V, M: Deref<RawTable<K,V>>> Bucket<K, V, M> { + pub fn new(table: M, hash: &SafeHash) -> Bucket<K, V, M> { + let ib_index = (hash.inspect() as uint) & (table.capacity() - 1); + Bucket { + raw: unsafe { + table.as_mut_ptrs().offset(ib_index as int) + }, + idx: ib_index, + table: table + } + } + + pub fn at_index(table: M, ib_index: uint) -> Bucket<K, V, M> { + let ib_index = ib_index & (table.capacity() - 1); + Bucket { + raw: unsafe { + table.as_mut_ptrs().offset(ib_index as int) + }, + idx: ib_index, + table: table + } + } + + pub fn first(table: M) -> Bucket<K, V, M> { + Bucket { + raw: table.as_mut_ptrs(), + idx: 0, + table: table + } + } + + pub fn peek(self) -> BucketState<K, V, M> { + match unsafe { *self.raw.hash } { + EMPTY_BUCKET => + Empty(EmptyBucket { + raw: self.raw, + idx: self.idx, + table: self.table + }), + _ => + Full(FullBucket { + raw: self.raw, + idx: self.idx, + table: self.table + }) + } + } + + pub fn next(&mut self) { + self.idx += 1; + + let dist = if self.idx == self.table.capacity() { + -(self.table.capacity() as int - 1) + } else { + 1i + }; + + unsafe { + self.raw = self.raw.offset(dist); + } + } +} + +impl<K, V, M> BucketWithTable<M> for FullBucket<K, V, M> { + fn table<'a>(&'a self) -> &'a M { + &self.table + } + + fn into_table(self) -> M { + self.table + } + + fn index(&self) -> uint { + self.idx + } +} + +impl<K, V, M> BucketWithTable<M> for EmptyBucket<K, V, M> { + fn table<'a>(&'a self) -> &'a M { + &self.table + } + + fn into_table(self) -> M { + self.table + } + + fn index(&self) -> uint { + self.idx + } +} + +impl<K, V, M> BucketWithTable<M> for Bucket<K, V, M> { + fn table<'a>(&'a self) -> &'a M { + &self.table + } + + fn into_table(self) -> M { + self.table + } + + fn index(&self) -> uint { + self.idx + } +} + +impl<'table,K,V> Deref<RawTable<K,V>> for &'table RawTable<K,V> { + fn deref<'a>(&'a self) -> &'a RawTable<K,V> { + &**self + } +} + +impl<'table,K,V> Deref<RawTable<K,V>> for &'table mut RawTable<K,V> { + fn deref<'a>(&'a self) -> &'a RawTable<K,V> { + &**self + } +} + +impl<'table,K,V> DerefMut<RawTable<K,V>> for &'table mut RawTable<K,V> { + fn deref_mut<'a>(&'a mut self) -> &'a mut RawTable<K,V> { + &mut **self + } +} + +pub enum BucketState<K, V, M> { + Empty(EmptyBucket<K, V, M>), + Full(FullBucket<K, V, M>), +} + +/// A hash that is not zero, since we use a hash of zero to represent empty +/// buckets. +#[deriving(PartialEq)] +pub struct SafeHash { + hash: u64, +} + +impl SafeHash { + /// Peek at the hash value, which is guaranteed to be non-zero. + #[inline(always)] + pub fn inspect(&self) -> u64 { self.hash } +} + +/// We need to remove hashes of 0. That's reserved for empty buckets. +/// This function wraps up `hash_keyed` to be the only way outside this +/// module to generate a SafeHash. +pub fn make_hash<T: Hash<S>, S, H: Hasher<S>>(hasher: &H, t: &T) -> SafeHash { + match hasher.hash(t) { + // This constant is exceedingly likely to hash to the same + // bucket, but it won't be counted as empty! + EMPTY_BUCKET => SafeHash { hash: 0x8000_0000_0000_0000 }, + h => SafeHash { hash: h }, + } +} + +fn round_up_to_next(unrounded: uint, target_alignment: uint) -> uint { + assert!(is_power_of_two(target_alignment)); + (unrounded + target_alignment - 1) & !(target_alignment - 1) +} + +#[test] +fn test_rounding() { + assert_eq!(round_up_to_next(0, 4), 0); + assert_eq!(round_up_to_next(1, 4), 4); + assert_eq!(round_up_to_next(2, 4), 4); + assert_eq!(round_up_to_next(3, 4), 4); + assert_eq!(round_up_to_next(4, 4), 4); + assert_eq!(round_up_to_next(5, 4), 8); +} + +// Returns a tuple of (minimum required malloc alignment, hash_offset, +// key_offset, val_offset, array_size), from the start of a mallocated array. +fn calculate_offsets( + hash_size: uint, hash_align: uint, + keys_size: uint, keys_align: uint, + vals_size: uint, vals_align: uint) -> (uint, uint, uint, uint, uint) { + + let hash_offset = 0; + let end_of_hashes = hash_offset + hash_size; + + let keys_offset = round_up_to_next(end_of_hashes, keys_align); + let end_of_keys = keys_offset + keys_size; + + let vals_offset = round_up_to_next(end_of_keys, vals_align); + let end_of_vals = vals_offset + vals_size; + + let min_align = cmp::max(hash_align, cmp::max(keys_align, vals_align)); + + (min_align, hash_offset, keys_offset, vals_offset, end_of_vals) +} + +#[test] +fn test_offset_calculation() { + assert_eq!(calculate_offsets(128, 8, 15, 1, 4, 4 ), (8, 0, 128, 144, 148)); + assert_eq!(calculate_offsets(3, 1, 2, 1, 1, 1 ), (1, 0, 3, 5, 6)); + assert_eq!(calculate_offsets(6, 2, 12, 4, 24, 8), (8, 0, 8, 24, 48)); +} + +impl<K, V> RawTable<K, V> { + + /// Does not initialize the buckets. The caller should ensure they, + /// at the very least, set every hash to EMPTY_BUCKET. + unsafe fn new_uninitialized(capacity: uint) -> RawTable<K, V> { + if capacity == 0 { + return RawTable { + size: 0, + capacity: 0, + hashes: 0 as *mut u64, + }; + } + let hashes_size = capacity.checked_mul(&size_of::<u64>()) + .expect("capacity overflow"); + let keys_size = capacity.checked_mul(&size_of::< K >()) + .expect("capacity overflow"); + let vals_size = capacity.checked_mul(&size_of::< V >()) + .expect("capacity overflow"); + + // Allocating hashmaps is a little tricky. We need to allocate three + // arrays, but since we know their sizes and alignments up front, + // we just allocate a single array, and then have the subarrays + // point into it. + // + // This is great in theory, but in practice getting the alignment + // right is a little subtle. Therefore, calculating offsets has been + // factored out into a different function. + let (malloc_alignment, hash_offset, _, _, size) = + calculate_offsets( + hashes_size, min_align_of::<u64>(), + keys_size, min_align_of::< K >(), + vals_size, min_align_of::< V >()); + + let buffer = allocate(size, malloc_alignment); + + let hashes = buffer.offset(hash_offset as int) as *mut u64; + + RawTable { + capacity: capacity, + size: 0, + hashes: hashes, + } + } + + fn as_mut_ptrs(&self) -> RawBucket<K, V> { + let hashes_size = self.capacity * size_of::<u64>(); + let keys_size = self.capacity * size_of::<K>(); + + let keys_offset = (hashes_size + min_align_of::< K >() - 1) & !(min_align_of::< K >() - 1); + let end_of_keys = keys_offset + keys_size; + + let vals_offset = (end_of_keys + min_align_of::< V >() - 1) & !(min_align_of::< V >() - 1); + + let buffer = self.hashes as *mut u8; + + unsafe { + RawBucket { + hash: self.hashes, + key: buffer.offset(keys_offset as int) as *mut K, + val: buffer.offset(vals_offset as int) as *mut V + } + } + } + + /// Creates a new raw table from a given capacity. All buckets are + /// initially empty. + #[allow(experimental)] + pub fn new(capacity: uint) -> RawTable<K, V> { + unsafe { + let ret = RawTable::new_uninitialized(capacity); + set_memory(ret.hashes, 0u8, capacity); + ret + } + } + + /// The hashtable's capacity, similar to a vector's. + pub fn capacity(&self) -> uint { + self.capacity + } + + /// The number of elements ever `put` in the hashtable, minus the number + /// of elements ever `take`n. + pub fn size(&self) -> uint { + self.size + } + + fn ptrs<'a>(&'a self) -> RawBuckets<'a, K, V> { + RawBuckets { + raw: self.as_mut_ptrs(), + hashes_end: unsafe { + self.hashes.offset(self.capacity as int) + } + } + } + + pub fn iter<'a>(&'a self) -> Entries<'a, K, V> { + Entries { + iter: self.ptrs(), + elems_left: self.size(), + } + } + + pub fn mut_iter<'a>(&'a mut self) -> MutEntries<'a, K, V> { + MutEntries { + iter: self.ptrs(), + elems_left: self.size(), + } + } + + pub fn move_iter(self) -> MoveEntries<K, V> { + MoveEntries { + iter: self.ptrs(), + table: self, + } + } + + pub fn rev_move_buckets<'a>(&'a mut self) -> RevMoveBuckets<'a, K, V> { + let raw_bucket = self.as_mut_ptrs(); + unsafe { + RevMoveBuckets { + raw: raw_bucket.offset(self.capacity as int), + hashes_end: raw_bucket.hash, + elems_left: self.size + } + } + } +} + +pub struct RawBuckets<'a, K, V> { + raw: RawBucket<K, V>, + hashes_end: *mut u64 +} + +impl<'a, K, V> Iterator<RawBucket<K, V>> for RawBuckets<'a, K, V> { + fn next(&mut self) -> Option<RawBucket<K, V>> { + while self.raw.hash != self.hashes_end { + unsafe { + let prev = ptr::replace(&mut self.raw, self.raw.offset(1)); + if *prev.hash != EMPTY_BUCKET { + return Some(prev); + } + } + } + + None + } +} + +pub struct RevMoveBuckets<'a, K, V> { + raw: RawBucket<K, V>, + hashes_end: *mut u64, + elems_left: uint +} + +impl<'a, K, V> Iterator<(K, V)> for RevMoveBuckets<'a, K, V> { + fn next(&mut self) -> Option<(K, V)> { + if self.elems_left == 0 { + return None; + } + + loop { + debug_assert!(self.raw.hash != self.hashes_end); + + unsafe { + self.raw = self.raw.offset(-1); + + if *self.raw.hash != EMPTY_BUCKET { + self.elems_left -= 1; + return Some(( + ptr::read(self.raw.key as *const K), + ptr::read(self.raw.val as *const V) + )); + } + } + } + } +} + +// `read_all_mut` casts a `*u64` to a `*SafeHash`. Since we statically +// ensure that a `FullIndex` points to an index with a non-zero hash, +// and a `SafeHash` is just a `u64` with a different name, this is +// safe. +// +// This test ensures that a `SafeHash` really IS the same size as a +// `u64`. If you need to change the size of `SafeHash` (and +// consequently made this test fail), `read_all_mut` needs to be +// modified to no longer assume this. +#[test] +fn can_alias_safehash_as_u64() { + assert_eq!(size_of::<SafeHash>(), size_of::<u64>()) +} + +/// Note: stage0-specific version that lacks bound. +#[cfg(stage0)] +pub struct Entries<'a, K, V> { + iter: RawBuckets<'a, K, V>, + elems_left: uint, +} + +/// Iterator over shared references to entries in a table. +#[cfg(not(stage0))] +pub struct Entries<'a, K: 'a, V: 'a> { + iter: RawBuckets<'a, K, V>, + elems_left: uint, +} + +/// Note: stage0-specific version that lacks bound. +#[cfg(stage0)] +pub struct MutEntries<'a, K, V> { + iter: RawBuckets<'a, K, V>, + elems_left: uint, +} + +/// Iterator over mutable references to entries in a table. +#[cfg(not(stage0))] +pub struct MutEntries<'a, K: 'a, V: 'a> { + iter: RawBuckets<'a, K, V>, + elems_left: uint, +} + +/// Iterator over the entries in a table, consuming the table. +pub struct MoveEntries<K, V> { + table: RawTable<K, V>, + iter: RawBuckets<'static, K, V> +} + +impl<'a, K, V> Iterator<(&'a K, &'a V)> for Entries<'a, K, V> { + fn next(&mut self) -> Option<(&'a K, &'a V)> { + self.iter.next().map(|bucket| { + self.elems_left -= 1; + unsafe { + (&*bucket.key, + &*bucket.val) + } + }) + } + + fn size_hint(&self) -> (uint, Option<uint>) { + (self.elems_left, Some(self.elems_left)) + } +} + +impl<'a, K, V> Iterator<(&'a K, &'a mut V)> for MutEntries<'a, K, V> { + fn next(&mut self) -> Option<(&'a K, &'a mut V)> { + self.iter.next().map(|bucket| { + self.elems_left -= 1; + unsafe { + (&*bucket.key, + &mut *bucket.val) + } + }) + } + + fn size_hint(&self) -> (uint, Option<uint>) { + (self.elems_left, Some(self.elems_left)) + } +} + +impl<K, V> Iterator<(SafeHash, K, V)> for MoveEntries<K, V> { + fn next(&mut self) -> Option<(SafeHash, K, V)> { + self.iter.next().map(|bucket| { + self.table.size -= 1; + unsafe { + ( + SafeHash { + hash: *bucket.hash, + }, + ptr::read(bucket.key as *const K), + ptr::read(bucket.val as *const V) + ) + } + }) + } + + fn size_hint(&self) -> (uint, Option<uint>) { + let size = self.table.size(); + (size, Some(size)) + } +} + +impl<K: Clone, V: Clone> Clone for RawTable<K, V> { + fn clone(&self) -> RawTable<K, V> { + unsafe { + let mut new_ht = RawTable::new_uninitialized(self.capacity()); + + { + let cap = self.capacity(); + let mut new_buckets = Bucket::first(&mut new_ht); + let mut buckets = Bucket::first(self); + while buckets.index() != cap { + match buckets.peek() { + Full(full) => { + let (h, k, v) = { + let (k, v) = full.read(); + (full.hash(), k.clone(), v.clone()) + }; + *new_buckets.raw.hash = h.inspect(); + mem::overwrite(new_buckets.raw.key, k); + mem::overwrite(new_buckets.raw.val, v); + } + _ => { + *new_buckets.raw.hash = EMPTY_BUCKET; + } + } + new_buckets.next(); + buckets.next(); + } + } + + new_ht.size = self.size(); + + new_ht + } + } +} + +#[unsafe_destructor] +impl<K, V> Drop for RawTable<K, V> { + fn drop(&mut self) { + if self.hashes.is_null() { + return; + } + // This is in reverse because we're likely to have partially taken + // some elements out with `.move_iter()` from the front. + // Check if the size is 0, so we don't do a useless scan when + // dropping empty tables such as on resize. + // Avoid double free of elements already moved out. + for _ in self.rev_move_buckets() {} + + let hashes_size = self.capacity * size_of::<u64>(); + let keys_size = self.capacity * size_of::<K>(); + let vals_size = self.capacity * size_of::<V>(); + let (align, _, _, _, size) = calculate_offsets(hashes_size, min_align_of::<u64>(), + keys_size, min_align_of::<K>(), + vals_size, min_align_of::<V>()); + + unsafe { + deallocate(self.hashes as *mut u8, size, align); + // Remember how everything was allocated out of one buffer + // during initialization? We only need one call to free here. + } + + self.hashes = RawPtr::null(); + } +} |
