From 112c8a966fbdb52ff2a535dc8e6df3a8b3cb8fb2 Mon Sep 17 00:00:00 2001 From: Alexis Beingessner Date: Thu, 30 Oct 2014 21:25:08 -0400 Subject: refactor libcollections as part of collection reform * Moves multi-collection files into their own directory, and splits them into seperate files * Changes exports so that each collection has its own module * Adds underscores to public modules and filenames to match standard naming conventions (that is, treemap::{TreeMap, TreeSet} => tree_map::TreeMap, tree_set::TreeSet) * Renames PriorityQueue to BinaryHeap * Renames SmallIntMap to VecMap * Miscellanious fallout fixes [breaking-change] --- src/libstd/collections/hash/bench.rs | 130 ++ src/libstd/collections/hash/map.rs | 2133 +++++++++++++++++++++++++++++++ src/libstd/collections/hash/mod.rs | 16 + src/libstd/collections/hash/set.rs | 834 ++++++++++++ src/libstd/collections/hash/table.rs | 907 +++++++++++++ src/libstd/collections/hashmap/bench.rs | 130 -- src/libstd/collections/hashmap/map.rs | 2133 ------------------------------- src/libstd/collections/hashmap/mod.rs | 33 - src/libstd/collections/hashmap/set.rs | 834 ------------ src/libstd/collections/hashmap/table.rs | 907 ------------- src/libstd/collections/mod.rs | 41 +- 11 files changed, 4047 insertions(+), 4051 deletions(-) create mode 100644 src/libstd/collections/hash/bench.rs create mode 100644 src/libstd/collections/hash/map.rs create mode 100644 src/libstd/collections/hash/mod.rs create mode 100644 src/libstd/collections/hash/set.rs create mode 100644 src/libstd/collections/hash/table.rs delete mode 100644 src/libstd/collections/hashmap/bench.rs delete mode 100644 src/libstd/collections/hashmap/map.rs delete mode 100644 src/libstd/collections/hashmap/mod.rs delete mode 100644 src/libstd/collections/hashmap/set.rs delete mode 100644 src/libstd/collections/hashmap/table.rs (limited to 'src/libstd/collections') diff --git a/src/libstd/collections/hash/bench.rs b/src/libstd/collections/hash/bench.rs new file mode 100644 index 00000000000..62b93336a34 --- /dev/null +++ b/src/libstd/collections/hash/bench.rs @@ -0,0 +1,130 @@ +// Copyright 2014 The Rust Project Developers. See the COPYRIGHT +// file at the top-level directory of this distribution and at +// http://rust-lang.org/COPYRIGHT. +// +// Licensed under the Apache License, Version 2.0 or the MIT license +// , at your +// option. This file may not be copied, modified, or distributed +// except according to those terms. + +#![cfg(test)] + +extern crate test; +use prelude::*; + +use self::test::Bencher; +use iter::{range_inclusive}; + +#[bench] +fn new_drop(b : &mut Bencher) { + use super::map::HashMap; + + b.iter(|| { + let m : HashMap = HashMap::new(); + assert_eq!(m.len(), 0); + }) +} + +#[bench] +fn new_insert_drop(b : &mut Bencher) { + use super::map::HashMap; + + b.iter(|| { + let mut m = HashMap::new(); + m.insert(0i, 0i); + assert_eq!(m.len(), 1); + }) +} + +#[bench] +fn grow_by_insertion(b: &mut Bencher) { + use super::map::HashMap; + + let mut m = HashMap::new(); + + for i in range_inclusive(1i, 1000) { + m.insert(i, i); + } + + let mut k = 1001; + + b.iter(|| { + m.insert(k, k); + k += 1; + }); +} + +#[bench] +fn find_existing(b: &mut Bencher) { + use super::map::HashMap; + + let mut m = HashMap::new(); + + for i in range_inclusive(1i, 1000) { + m.insert(i, i); + } + + b.iter(|| { + for i in range_inclusive(1i, 1000) { + m.contains_key(&i); + } + }); +} + +#[bench] +fn find_nonexisting(b: &mut Bencher) { + use super::map::HashMap; + + let mut m = HashMap::new(); + + for i in range_inclusive(1i, 1000) { + m.insert(i, i); + } + + b.iter(|| { + for i in range_inclusive(1001i, 2000) { + m.contains_key(&i); + } + }); +} + +#[bench] +fn hashmap_as_queue(b: &mut Bencher) { + use super::map::HashMap; + + let mut m = HashMap::new(); + + for i in range_inclusive(1i, 1000) { + m.insert(i, i); + } + + let mut k = 1i; + + b.iter(|| { + m.pop(&k); + m.insert(k + 1000, k + 1000); + k += 1; + }); +} + +#[bench] +fn find_pop_insert(b: &mut Bencher) { + use super::map::HashMap; + + let mut m = HashMap::new(); + + for i in range_inclusive(1i, 1000) { + m.insert(i, i); + } + + let mut k = 1i; + + b.iter(|| { + m.find(&(k + 400)); + m.find(&(k + 2000)); + m.pop(&k); + m.insert(k + 1000, k + 1000); + k += 1; + }) +} diff --git a/src/libstd/collections/hash/map.rs b/src/libstd/collections/hash/map.rs new file mode 100644 index 00000000000..596e483c2f6 --- /dev/null +++ b/src/libstd/collections/hash/map.rs @@ -0,0 +1,2133 @@ +// Copyright 2014 The Rust Project Developers. See the COPYRIGHT +// file at the top-level directory of this distribution and at +// http://rust-lang.org/COPYRIGHT. +// +// Licensed under the Apache License, Version 2.0 or the MIT license +// , at your +// option. This file may not be copied, modified, or distributed +// except according to those terms. +// +// ignore-lexer-test FIXME #15883 + +use clone::Clone; +use cmp::{max, Eq, Equiv, PartialEq}; +use default::Default; +use fmt::{mod, Show}; +use hash::{Hash, Hasher, RandomSipHasher}; +use iter::{mod, Iterator, FromIterator, Extendable}; +use kinds::Sized; +use mem::{mod, replace}; +use num; +use ops::{Deref, Index, IndexMut}; +use option::{Some, None, Option}; +use result::{Result, Ok, Err}; + +use super::table; +use super::table::{ + Bucket, + Empty, + EmptyBucket, + Full, + FullBucket, + FullBucketImm, + FullBucketMut, + RawTable, + SafeHash +}; + +const INITIAL_LOG2_CAP: uint = 5; +pub const INITIAL_CAPACITY: uint = 1 << INITIAL_LOG2_CAP; // 2^5 + +/// The default behavior of HashMap implements a load factor of 90.9%. +/// This behavior is characterized by the following conditions: +/// +/// - if size > 0.909 * capacity: grow +/// - if size < 0.25 * capacity: shrink (if this won't bring capacity lower +/// than the minimum) +#[deriving(Clone)] +struct DefaultResizePolicy { + /// Doubled minimal capacity. The capacity must never drop below + /// the minimum capacity. (The check happens before the capacity + /// is potentially halved.) + minimum_capacity2: uint +} + +impl DefaultResizePolicy { + fn new(new_capacity: uint) -> DefaultResizePolicy { + DefaultResizePolicy { + minimum_capacity2: new_capacity << 1 + } + } + + #[inline] + fn capacity_range(&self, new_size: uint) -> (uint, uint) { + // Here, we are rephrasing the logic by specifying the ranges: + // + // - if `size * 1.1 < cap < size * 4`: don't resize + // - if `cap < minimum_capacity * 2`: don't shrink + // - otherwise, resize accordingly + ((new_size * 11) / 10, max(new_size << 2, self.minimum_capacity2)) + } + + #[inline] + fn reserve(&mut self, new_capacity: uint) { + self.minimum_capacity2 = new_capacity << 1; + } +} + +// The main performance trick in this hashmap is called Robin Hood Hashing. +// It gains its excellent performance from one essential operation: +// +// If an insertion collides with an existing element, and that element's +// "probe distance" (how far away the element is from its ideal location) +// is higher than how far we've already probed, swap the elements. +// +// This massively lowers variance in probe distance, and allows us to get very +// high load factors with good performance. The 90% load factor I use is rather +// conservative. +// +// > Why a load factor of approximately 90%? +// +// In general, all the distances to initial buckets will converge on the mean. +// At a load factor of α, the odds of finding the target bucket after k +// probes is approximately 1-α^k. If we set this equal to 50% (since we converge +// on the mean) and set k=8 (64-byte cache line / 8-byte hash), α=0.92. I round +// this down to make the math easier on the CPU and avoid its FPU. +// Since on average we start the probing in the middle of a cache line, this +// strategy pulls in two cache lines of hashes on every lookup. I think that's +// pretty good, but if you want to trade off some space, it could go down to one +// cache line on average with an α of 0.84. +// +// > Wait, what? Where did you get 1-α^k from? +// +// On the first probe, your odds of a collision with an existing element is α. +// The odds of doing this twice in a row is approximately α^2. For three times, +// α^3, etc. Therefore, the odds of colliding k times is α^k. The odds of NOT +// colliding after k tries is 1-α^k. +// +// The paper from 1986 cited below mentions an implementation which keeps track +// of the distance-to-initial-bucket histogram. This approach is not suitable +// for modern architectures because it requires maintaining an internal data +// structure. This allows very good first guesses, but we are most concerned +// with guessing entire cache lines, not individual indexes. Furthermore, array +// accesses are no longer linear and in one direction, as we have now. There +// is also memory and cache pressure that this would entail that would be very +// difficult to properly see in a microbenchmark. +// +// ## Future Improvements (FIXME!) +// +// Allow the load factor to be changed dynamically and/or at initialization. +// +// Also, would it be possible for us to reuse storage when growing the +// underlying table? This is exactly the use case for 'realloc', and may +// be worth exploring. +// +// ## Future Optimizations (FIXME!) +// +// Another possible design choice that I made without any real reason is +// parameterizing the raw table over keys and values. Technically, all we need +// is the size and alignment of keys and values, and the code should be just as +// efficient (well, we might need one for power-of-two size and one for not...). +// This has the potential to reduce code bloat in rust executables, without +// really losing anything except 4 words (key size, key alignment, val size, +// val alignment) which can be passed in to every call of a `RawTable` function. +// This would definitely be an avenue worth exploring if people start complaining +// about the size of rust executables. +// +// Annotate exceedingly likely branches in `table::make_hash` +// and `search_hashed_generic` to reduce instruction cache pressure +// and mispredictions once it becomes possible (blocked on issue #11092). +// +// Shrinking the table could simply reallocate in place after moving buckets +// to the first half. +// +// The growth algorithm (fragment of the Proof of Correctness) +// -------------------- +// +// The growth algorithm is basically a fast path of the naive reinsertion- +// during-resize algorithm. Other paths should never be taken. +// +// Consider growing a robin hood hashtable of capacity n. Normally, we do this +// by allocating a new table of capacity `2n`, and then individually reinsert +// each element in the old table into the new one. This guarantees that the +// new table is a valid robin hood hashtable with all the desired statistical +// properties. Remark that the order we reinsert the elements in should not +// matter. For simplicity and efficiency, we will consider only linear +// reinsertions, which consist of reinserting all elements in the old table +// into the new one by increasing order of index. However we will not be +// starting our reinsertions from index 0 in general. If we start from index +// i, for the purpose of reinsertion we will consider all elements with real +// index j < i to have virtual index n + j. +// +// Our hash generation scheme consists of generating a 64-bit hash and +// truncating the most significant bits. When moving to the new table, we +// simply introduce a new bit to the front of the hash. Therefore, if an +// elements has ideal index i in the old table, it can have one of two ideal +// locations in the new table. If the new bit is 0, then the new ideal index +// is i. If the new bit is 1, then the new ideal index is n + i. Intutively, +// we are producing two independent tables of size n, and for each element we +// independently choose which table to insert it into with equal probability. +// However the rather than wrapping around themselves on overflowing their +// indexes, the first table overflows into the first, and the first into the +// second. Visually, our new table will look something like: +// +// [yy_xxx_xxxx_xxx|xx_yyy_yyyy_yyy] +// +// Where x's are elements inserted into the first table, y's are elements +// inserted into the second, and _'s are empty sections. We now define a few +// key concepts that we will use later. Note that this is a very abstract +// perspective of the table. A real resized table would be at least half +// empty. +// +// Theorem: A linear robin hood reinsertion from the first ideal element +// produces identical results to a linear naive reinsertion from the same +// element. +// +// FIXME(Gankro, pczarn): review the proof and put it all in a separate doc.rs + +/// A hash map implementation which uses linear probing with Robin +/// Hood bucket stealing. +/// +/// The hashes are all keyed by the task-local random number generator +/// on creation by default. This means that the ordering of the keys is +/// randomized, but makes the tables more resistant to +/// denial-of-service attacks (Hash DoS). This behaviour can be +/// overridden with one of the constructors. +/// +/// It is required that the keys implement the `Eq` and `Hash` traits, although +/// this can frequently be achieved by using `#[deriving(Eq, Hash)]`. +/// +/// Relevant papers/articles: +/// +/// 1. Pedro Celis. ["Robin Hood Hashing"](https://cs.uwaterloo.ca/research/tr/1986/CS-86-14.pdf) +/// 2. Emmanuel Goossaert. ["Robin Hood +/// hashing"](http://codecapsule.com/2013/11/11/robin-hood-hashing/) +/// 3. Emmanuel Goossaert. ["Robin Hood hashing: backward shift +/// deletion"](http://codecapsule.com/2013/11/17/robin-hood-hashing-backward-shift-deletion/) +/// +/// # Example +/// +/// ``` +/// use std::collections::HashMap; +/// +/// // type inference lets us omit an explicit type signature (which +/// // would be `HashMap<&str, &str>` in this example). +/// let mut book_reviews = HashMap::new(); +/// +/// // review some books. +/// book_reviews.insert("Adventures of Huckleberry Finn", "My favorite book."); +/// book_reviews.insert("Grimms' Fairy Tales", "Masterpiece."); +/// book_reviews.insert("Pride and Prejudice", "Very enjoyable."); +/// book_reviews.insert("The Adventures of Sherlock Holmes", "Eye lyked it alot."); +/// +/// // check for a specific one. +/// if !book_reviews.contains_key(&("Les Misérables")) { +/// println!("We've got {} reviews, but Les Misérables ain't one.", +/// book_reviews.len()); +/// } +/// +/// // oops, this review has a lot of spelling mistakes, let's delete it. +/// book_reviews.remove(&("The Adventures of Sherlock Holmes")); +/// +/// // look up the values associated with some keys. +/// let to_find = ["Pride and Prejudice", "Alice's Adventure in Wonderland"]; +/// for book in to_find.iter() { +/// match book_reviews.find(book) { +/// Some(review) => println!("{}: {}", *book, *review), +/// None => println!("{} is unreviewed.", *book) +/// } +/// } +/// +/// // iterate over everything. +/// for (book, review) in book_reviews.iter() { +/// println!("{}: \"{}\"", *book, *review); +/// } +/// ``` +/// +/// The easiest way to use `HashMap` with a custom type is to derive `Eq` and `Hash`. +/// We must also derive `PartialEq`. +/// +/// ``` +/// use std::collections::HashMap; +/// +/// #[deriving(Hash, Eq, PartialEq, Show)] +/// struct Viking<'a> { +/// name: &'a str, +/// power: uint, +/// } +/// +/// let mut vikings = HashMap::new(); +/// +/// vikings.insert("Norway", Viking { name: "Einar", power: 9u }); +/// vikings.insert("Denmark", Viking { name: "Olaf", power: 4u }); +/// vikings.insert("Iceland", Viking { name: "Harald", power: 8u }); +/// +/// // Use derived implementation to print the vikings. +/// for (land, viking) in vikings.iter() { +/// println!("{} at {}", viking, land); +/// } +/// ``` +#[deriving(Clone)] +pub struct HashMap { + // All hashes are keyed on these values, to prevent hash collision attacks. + hasher: H, + + table: RawTable, + + // We keep this at the end since it might as well have tail padding. + resize_policy: DefaultResizePolicy, +} + +/// Search for a pre-hashed key. +fn search_hashed_generic>>(table: M, + hash: &SafeHash, + is_match: |&K| -> bool) + -> SearchResult { + let size = table.size(); + let mut probe = Bucket::new(table, hash); + let ib = probe.index(); + + while probe.index() != ib + size { + let full = match probe.peek() { + Empty(b) => return TableRef(b.into_table()), // hit an empty bucket + Full(b) => b + }; + + if full.distance() + ib < full.index() { + // We can finish the search early if we hit any bucket + // with a lower distance to initial bucket than we've probed. + return TableRef(full.into_table()); + } + + // If the hash doesn't match, it can't be this one.. + if *hash == full.hash() { + let matched = { + let (k, _) = full.read(); + is_match(k) + }; + + // If the key doesn't match, it can't be this one.. + if matched { + return FoundExisting(full); + } + } + + probe = full.next(); + } + + TableRef(probe.into_table()) +} + +fn search_hashed>>(table: M, hash: &SafeHash, k: &K) + -> SearchResult { + search_hashed_generic(table, hash, |k_| *k == *k_) +} + +fn pop_internal(starting_bucket: FullBucketMut) -> (K, V) { + let (empty, retkey, retval) = starting_bucket.take(); + let mut gap = match empty.gap_peek() { + Some(b) => b, + None => return (retkey, retval) + }; + + while gap.full().distance() != 0 { + gap = match gap.shift() { + Some(b) => b, + None => break + }; + } + + // Now we've done all our shifting. Return the value we grabbed earlier. + return (retkey, retval); +} + +/// Perform robin hood bucket stealing at the given `bucket`. You must +/// also pass the position of that bucket's initial bucket so we don't have +/// to recalculate it. +/// +/// `hash`, `k`, and `v` are the elements to "robin hood" into the hashtable. +fn robin_hood<'a, K: 'a, V: 'a>(mut bucket: FullBucketMut<'a, K, V>, + mut ib: uint, + mut hash: SafeHash, + mut k: K, + mut v: V) + -> &'a mut V { + let starting_index = bucket.index(); + let size = { + let table = bucket.table(); // FIXME "lifetime too short". + table.size() + }; + // There can be at most `size - dib` buckets to displace, because + // in the worst case, there are `size` elements and we already are + // `distance` buckets away from the initial one. + let idx_end = starting_index + size - bucket.distance(); + + loop { + let (old_hash, old_key, old_val) = bucket.replace(hash, k, v); + loop { + let probe = bucket.next(); + assert!(probe.index() != idx_end); + + let full_bucket = match probe.peek() { + table::Empty(bucket) => { + // Found a hole! + let b = bucket.put(old_hash, old_key, old_val); + // Now that it's stolen, just read the value's pointer + // right out of the table! + let (_, v) = Bucket::at_index(b.into_table(), starting_index).peek() + .expect_full() + .into_mut_refs(); + return v; + }, + table::Full(bucket) => bucket + }; + + let probe_ib = full_bucket.index() - full_bucket.distance(); + + bucket = full_bucket; + + // Robin hood! Steal the spot. + if ib < probe_ib { + ib = probe_ib; + hash = old_hash; + k = old_key; + v = old_val; + break; + } + } + } +} + +/// A result that works like Option> but preserves +/// the reference that grants us access to the table in any case. +enum SearchResult { + // This is an entry that holds the given key: + FoundExisting(FullBucket), + + // There was no such entry. The reference is given back: + TableRef(M) +} + +impl SearchResult { + fn into_option(self) -> Option> { + match self { + FoundExisting(bucket) => Some(bucket), + TableRef(_) => None + } + } +} + +impl, V, S, H: Hasher> HashMap { + fn make_hash>(&self, x: &X) -> SafeHash { + table::make_hash(&self.hasher, x) + } + + fn search_equiv<'a, Sized? Q: Hash + Equiv>(&'a self, q: &Q) + -> Option> { + let hash = self.make_hash(q); + search_hashed_generic(&self.table, &hash, |k| q.equiv(k)).into_option() + } + + fn search_equiv_mut<'a, Sized? Q: Hash + Equiv>(&'a mut self, q: &Q) + -> Option> { + let hash = self.make_hash(q); + search_hashed_generic(&mut self.table, &hash, |k| q.equiv(k)).into_option() + } + + /// Search for a key, yielding the index if it's found in the hashtable. + /// If you already have the hash for the key lying around, use + /// search_hashed. + fn search<'a>(&'a self, k: &K) -> Option> { + let hash = self.make_hash(k); + search_hashed(&self.table, &hash, k).into_option() + } + + fn search_mut<'a>(&'a mut self, k: &K) -> Option> { + let hash = self.make_hash(k); + search_hashed(&mut self.table, &hash, k).into_option() + } + + // The caller should ensure that invariants by Robin Hood Hashing hold. + fn insert_hashed_ordered(&mut self, hash: SafeHash, k: K, v: V) { + let cap = self.table.capacity(); + let mut buckets = Bucket::new(&mut self.table, &hash); + let ib = buckets.index(); + + while buckets.index() != ib + cap { + // We don't need to compare hashes for value swap. + // Not even DIBs for Robin Hood. + buckets = match buckets.peek() { + Empty(empty) => { + empty.put(hash, k, v); + return; + } + Full(b) => b.into_bucket() + }; + buckets.next(); + } + panic!("Internal HashMap error: Out of space."); + } +} + +impl HashMap { + /// Create an empty HashMap. + /// + /// # Example + /// + /// ``` + /// use std::collections::HashMap; + /// let mut map: HashMap<&str, int> = HashMap::with_capacity(10); + /// ``` + #[inline] + pub fn new() -> HashMap { + let hasher = RandomSipHasher::new(); + HashMap::with_hasher(hasher) + } + + /// Creates an empty hash map with the given initial capacity. + /// + /// # Example + /// + /// ``` + /// use std::collections::HashMap; + /// let mut map: HashMap<&str, int> = HashMap::with_capacity(10); + /// ``` + #[inline] + pub fn with_capacity(capacity: uint) -> HashMap { + let hasher = RandomSipHasher::new(); + HashMap::with_capacity_and_hasher(capacity, hasher) + } +} + +impl, V, S, H: Hasher> HashMap { + /// Creates an empty hashmap which will use the given hasher to hash keys. + /// + /// The creates map has the default initial capacity. + /// + /// # Example + /// + /// ``` + /// use std::collections::HashMap; + /// use std::hash::sip::SipHasher; + /// + /// let h = SipHasher::new(); + /// let mut map = HashMap::with_hasher(h); + /// map.insert(1i, 2u); + /// ``` + #[inline] + pub fn with_hasher(hasher: H) -> HashMap { + HashMap { + hasher: hasher, + resize_policy: DefaultResizePolicy::new(INITIAL_CAPACITY), + table: RawTable::new(0), + } + } + + /// Create an empty HashMap with space for at least `capacity` + /// elements, using `hasher` to hash the keys. + /// + /// Warning: `hasher` is normally randomly generated, and + /// is designed to allow HashMaps to be resistant to attacks that + /// cause many collisions and very poor performance. Setting it + /// manually using this function can expose a DoS attack vector. + /// + /// # Example + /// + /// ``` + /// use std::collections::HashMap; + /// use std::hash::sip::SipHasher; + /// + /// let h = SipHasher::new(); + /// let mut map = HashMap::with_capacity_and_hasher(10, h); + /// map.insert(1i, 2u); + /// ``` + #[inline] + pub fn with_capacity_and_hasher(capacity: uint, hasher: H) -> HashMap { + let cap = num::next_power_of_two(max(INITIAL_CAPACITY, capacity)); + HashMap { + hasher: hasher, + resize_policy: DefaultResizePolicy::new(cap), + table: RawTable::new(cap), + } + } + + /// The hashtable will never try to shrink below this size. You can use + /// this function to reduce reallocations if your hashtable frequently + /// grows and shrinks by large amounts. + /// + /// This function has no effect on the operational semantics of the + /// hashtable, only on performance. + /// + /// # Example + /// + /// ``` + /// use std::collections::HashMap; + /// let mut map: HashMap<&str, int> = HashMap::new(); + /// map.reserve(10); + /// ``` + pub fn reserve(&mut self, new_minimum_capacity: uint) { + let cap = num::next_power_of_two( + max(INITIAL_CAPACITY, new_minimum_capacity)); + + self.resize_policy.reserve(cap); + + if self.table.capacity() < cap { + self.resize(cap); + } + } + + /// Resizes the internal vectors to a new capacity. It's your responsibility to: + /// 1) Make sure the new capacity is enough for all the elements, accounting + /// for the load factor. + /// 2) Ensure new_capacity is a power of two. + fn resize(&mut self, new_capacity: uint) { + assert!(self.table.size() <= new_capacity); + assert!(num::is_power_of_two(new_capacity)); + + let mut old_table = replace(&mut self.table, RawTable::new(new_capacity)); + let old_size = old_table.size(); + + if old_table.capacity() == 0 || old_table.size() == 0 { + return; + } + + if new_capacity < old_table.capacity() { + // Shrink the table. Naive algorithm for resizing: + for (h, k, v) in old_table.into_iter() { + self.insert_hashed_nocheck(h, k, v); + } + } else { + // Grow the table. + // Specialization of the other branch. + let mut bucket = Bucket::first(&mut old_table); + + // "So a few of the first shall be last: for many be called, + // but few chosen." + // + // We'll most likely encounter a few buckets at the beginning that + // have their initial buckets near the end of the table. They were + // placed at the beginning as the probe wrapped around the table + // during insertion. We must skip forward to a bucket that won't + // get reinserted too early and won't unfairly steal others spot. + // This eliminates the need for robin hood. + loop { + bucket = match bucket.peek() { + Full(full) => { + if full.distance() == 0 { + // This bucket occupies its ideal spot. + // It indicates the start of another "cluster". + bucket = full.into_bucket(); + break; + } + // Leaving this bucket in the last cluster for later. + full.into_bucket() + } + Empty(b) => { + // Encountered a hole between clusters. + b.into_bucket() + } + }; + bucket.next(); + } + + // This is how the buckets might be laid out in memory: + // ($ marks an initialized bucket) + // ________________ + // |$$$_$$$$$$_$$$$$| + // + // But we've skipped the entire initial cluster of buckets + // and will continue iteration in this order: + // ________________ + // |$$$$$$_$$$$$ + // ^ wrap around once end is reached + // ________________ + // $$$_____________| + // ^ exit once table.size == 0 + loop { + bucket = match bucket.peek() { + Full(bucket) => { + let h = bucket.hash(); + let (b, k, v) = bucket.take(); + self.insert_hashed_ordered(h, k, v); + { + let t = b.table(); // FIXME "lifetime too short". + if t.size() == 0 { break } + }; + b.into_bucket() + } + Empty(b) => b.into_bucket() + }; + bucket.next(); + } + } + + assert_eq!(self.table.size(), old_size); + } + + /// Performs any necessary resize operations, such that there's space for + /// new_size elements. + fn make_some_room(&mut self, new_size: uint) { + let (grow_at, shrink_at) = self.resize_policy.capacity_range(new_size); + let cap = self.table.capacity(); + + // An invalid value shouldn't make us run out of space. + debug_assert!(grow_at >= new_size); + + if cap <= grow_at { + let new_capacity = max(cap << 1, INITIAL_CAPACITY); + self.resize(new_capacity); + } else if shrink_at <= cap { + let new_capacity = cap >> 1; + self.resize(new_capacity); + } + } + + /// Insert a pre-hashed key-value pair, without first checking + /// that there's enough room in the buckets. Returns a reference to the + /// newly insert value. + /// + /// If the key already exists, the hashtable will be returned untouched + /// and a reference to the existing element will be returned. + fn insert_hashed_nocheck(&mut self, hash: SafeHash, k: K, v: V) -> &mut V { + self.insert_or_replace_with(hash, k, v, |_, _, _| ()) + } + + fn insert_or_replace_with<'a>(&'a mut self, + hash: SafeHash, + k: K, + v: V, + found_existing: |&mut K, &mut V, V|) + -> &'a mut V { + // Worst case, we'll find one empty bucket among `size + 1` buckets. + let size = self.table.size(); + let mut probe = Bucket::new(&mut self.table, &hash); + let ib = probe.index(); + + loop { + let mut bucket = match probe.peek() { + Empty(bucket) => { + // Found a hole! + let bucket = bucket.put(hash, k, v); + let (_, val) = bucket.into_mut_refs(); + return val; + }, + Full(bucket) => bucket + }; + + if bucket.hash() == hash { + let found_match = { + let (bucket_k, _) = bucket.read_mut(); + k == *bucket_k + }; + if found_match { + let (bucket_k, bucket_v) = bucket.into_mut_refs(); + debug_assert!(k == *bucket_k); + // Key already exists. Get its reference. + found_existing(bucket_k, bucket_v, v); + return bucket_v; + } + } + + let robin_ib = bucket.index() as int - bucket.distance() as int; + + if (ib as int) < robin_ib { + // Found a luckier bucket than me. Better steal his spot. + return robin_hood(bucket, robin_ib as uint, hash, k, v); + } + + probe = bucket.next(); + assert!(probe.index() != ib + size + 1); + } + } + + /// Retrieves a mutable value for the given key. + /// See [`find_mut`](../trait.MutableMap.html#tymethod.find_mut) for a non-panicking + /// alternative. + /// + /// # Failure + /// + /// Fails if the key is not present. + /// + /// # Example + /// + /// ``` + /// # #![allow(deprecated)] + /// use std::collections::HashMap; + /// + /// let mut map = HashMap::new(); + /// map.insert("a", 1i); + /// { + /// // val will freeze map to prevent usage during its lifetime + /// let val = map.get_mut(&"a"); + /// *val = 40; + /// } + /// assert_eq!(map["a"], 40); + /// + /// // A more direct way could be: + /// *map.get_mut(&"a") = -2; + /// assert_eq!(map["a"], -2); + /// ``` + #[deprecated = "use indexing instead: `&mut map[key]`"] + pub fn get_mut<'a>(&'a mut self, k: &K) -> &'a mut V { + &mut self[*k] + } + + /// Return true if the map contains a value for the specified key, + /// using equivalence. + /// + /// See [pop_equiv](#method.pop_equiv) for an extended example. + pub fn contains_key_equiv + Equiv>(&self, key: &Q) -> bool { + self.search_equiv(key).is_some() + } + + /// Return the value corresponding to the key in the map, using + /// equivalence. + /// + /// See [pop_equiv](#method.pop_equiv) for an extended example. + pub fn find_equiv<'a, Sized? Q: Hash + Equiv>(&'a self, k: &Q) -> Option<&'a V> { + match self.search_equiv(k) { + None => None, + Some(bucket) => { + let (_, v_ref) = bucket.into_refs(); + Some(v_ref) + } + } + } + + /// Remove an equivalent key from the map, returning the value at the + /// key if the key was previously in the map. + /// + /// # Example + /// + /// This is a slightly silly example where we define the number's + /// parity as the equivalence class. It is important that the + /// values hash the same, which is why we implement `Hash`. + /// + /// ``` + /// use std::collections::HashMap; + /// use std::hash::Hash; + /// use std::hash::sip::SipState; + /// + /// #[deriving(Eq, PartialEq)] + /// struct EvenOrOdd { + /// num: uint + /// }; + /// + /// impl Hash for EvenOrOdd { + /// fn hash(&self, state: &mut SipState) { + /// let parity = self.num % 2; + /// parity.hash(state); + /// } + /// } + /// + /// impl Equiv for EvenOrOdd { + /// fn equiv(&self, other: &EvenOrOdd) -> bool { + /// self.num % 2 == other.num % 2 + /// } + /// } + /// + /// let mut map = HashMap::new(); + /// map.insert(EvenOrOdd { num: 3 }, "foo"); + /// + /// assert!(map.contains_key_equiv(&EvenOrOdd { num: 1 })); + /// assert!(!map.contains_key_equiv(&EvenOrOdd { num: 4 })); + /// + /// assert_eq!(map.find_equiv(&EvenOrOdd { num: 5 }), Some(&"foo")); + /// assert_eq!(map.find_equiv(&EvenOrOdd { num: 2 }), None); + /// + /// assert_eq!(map.pop_equiv(&EvenOrOdd { num: 1 }), Some("foo")); + /// assert_eq!(map.pop_equiv(&EvenOrOdd { num: 2 }), None); + /// + /// ``` + #[experimental] + pub fn pop_equiv + Equiv>(&mut self, k: &Q) -> Option { + if self.table.size() == 0 { + return None + } + + let potential_new_size = self.table.size() - 1; + self.make_some_room(potential_new_size); + + match self.search_equiv_mut(k) { + Some(bucket) => { + let (_k, val) = pop_internal(bucket); + Some(val) + } + _ => None + } + } + + /// An iterator visiting all keys in arbitrary order. + /// Iterator element type is `&'a K`. + /// + /// # Example + /// + /// ``` + /// use std::collections::HashMap; + /// + /// let mut map = HashMap::new(); + /// map.insert("a", 1i); + /// map.insert("b", 2); + /// map.insert("c", 3); + /// + /// for key in map.keys() { + /// println!("{}", key); + /// } + /// ``` + pub fn keys(&self) -> Keys { + self.iter().map(|(k, _v)| k) + } + + /// An iterator visiting all values in arbitrary order. + /// Iterator element type is `&'a V`. + /// + /// # Example + /// + /// ``` + /// use std::collections::HashMap; + /// + /// let mut map = HashMap::new(); + /// map.insert("a", 1i); + /// map.insert("b", 2); + /// map.insert("c", 3); + /// + /// for key in map.values() { + /// println!("{}", key); + /// } + /// ``` + pub fn values(&self) -> Values { + self.iter().map(|(_k, v)| v) + } + + /// An iterator visiting all key-value pairs in arbitrary order. + /// Iterator element type is `(&'a K, &'a V)`. + /// + /// # Example + /// + /// ``` + /// use std::collections::HashMap; + /// + /// let mut map = HashMap::new(); + /// map.insert("a", 1i); + /// map.insert("b", 2); + /// map.insert("c", 3); + /// + /// for (key, val) in map.iter() { + /// println!("key: {} val: {}", key, val); + /// } + /// ``` + pub fn iter(&self) -> Entries { + Entries { inner: self.table.iter() } + } + + /// An iterator visiting all key-value pairs in arbitrary order, + /// with mutable references to the values. + /// Iterator element type is `(&'a K, &'a mut V)`. + /// + /// # Example + /// + /// ``` + /// use std::collections::HashMap; + /// + /// let mut map = HashMap::new(); + /// map.insert("a", 1i); + /// map.insert("b", 2); + /// map.insert("c", 3); + /// + /// // Update all values + /// for (_, val) in map.iter_mut() { + /// *val *= 2; + /// } + /// + /// for (key, val) in map.iter() { + /// println!("key: {} val: {}", key, val); + /// } + /// ``` + pub fn iter_mut(&mut self) -> MutEntries { + MutEntries { inner: self.table.iter_mut() } + } + + /// Creates a consuming iterator, that is, one that moves each key-value + /// pair out of the map in arbitrary order. The map cannot be used after + /// calling this. + /// + /// # Example + /// + /// ``` + /// use std::collections::HashMap; + /// + /// let mut map = HashMap::new(); + /// map.insert("a", 1i); + /// map.insert("b", 2); + /// map.insert("c", 3); + /// + /// // Not possible with .iter() + /// let vec: Vec<(&str, int)> = map.into_iter().collect(); + /// ``` + pub fn into_iter(self) -> MoveEntries { + MoveEntries { + inner: self.table.into_iter().map(|(_, k, v)| (k, v)) + } + } + + /// Gets the given key's corresponding entry in the map for in-place manipulation + pub fn entry<'a>(&'a mut self, key: K) -> Entry<'a, K, V> { + // Gotta resize now, and we don't know which direction, so try both? + let size = self.table.size(); + self.make_some_room(size + 1); + if size > 0 { + self.make_some_room(size - 1); + } + + let hash = self.make_hash(&key); + search_entry_hashed(&mut self.table, hash, key) + } + + /// Return the number of elements in the map. + /// + /// # Example + /// + /// ``` + /// use std::collections::HashMap; + /// + /// let mut a = HashMap::new(); + /// assert_eq!(a.len(), 0); + /// a.insert(1u, "a"); + /// assert_eq!(a.len(), 1); + /// ``` + pub fn len(&self) -> uint { self.table.size() } + + /// Return true if the map contains no elements. + /// + /// # Example + /// + /// ``` + /// use std::collections::HashMap; + /// + /// let mut a = HashMap::new(); + /// assert!(a.is_empty()); + /// a.insert(1u, "a"); + /// assert!(!a.is_empty()); + /// ``` + #[inline] + pub fn is_empty(&self) -> bool { self.len() == 0 } + + /// Clears the map, removing all key-value pairs. Keeps the allocated memory + /// for reuse. + /// + /// # Example + /// + /// ``` + /// use std::collections::HashMap; + /// + /// let mut a = HashMap::new(); + /// a.insert(1u, "a"); + /// a.clear(); + /// assert!(a.is_empty()); + /// ``` + pub fn clear(&mut self) { + // Prevent reallocations from happening from now on. Makes it possible + // for the map to be reused but has a downside: reserves permanently. + self.resize_policy.reserve(self.table.size()); + + let cap = self.table.capacity(); + let mut buckets = Bucket::first(&mut self.table); + + while buckets.index() != cap { + buckets = match buckets.peek() { + Empty(b) => b.next(), + Full(full) => { + let (b, _, _) = full.take(); + b.next() + } + }; + } + } + + /// Returns a reference to the value corresponding to the key. + /// + /// # Example + /// + /// ``` + /// use std::collections::HashMap; + /// + /// let mut map = HashMap::new(); + /// map.insert(1u, "a"); + /// assert_eq!(map.find(&1), Some(&"a")); + /// assert_eq!(map.find(&2), None); + /// ``` + pub fn find<'a>(&'a self, k: &K) -> Option<&'a V> { + self.search(k).map(|bucket| { + let (_, v) = bucket.into_refs(); + v + }) + } + + /// Returns true if the map contains a value for the specified key. + /// + /// # Example + /// + /// ``` + /// use std::collections::HashMap; + /// + /// let mut map = HashMap::new(); + /// map.insert(1u, "a"); + /// assert_eq!(map.contains_key(&1), true); + /// assert_eq!(map.contains_key(&2), false); + /// ``` + pub fn contains_key(&self, k: &K) -> bool { + self.search(k).is_some() + } + + /// Returns a mutable reference to the value corresponding to the key. + /// + /// # Example + /// + /// ``` + /// use std::collections::HashMap; + /// + /// let mut map = HashMap::new(); + /// map.insert(1u, "a"); + /// match map.find_mut(&1) { + /// Some(x) => *x = "b", + /// None => (), + /// } + /// assert_eq!(map[1], "b"); + /// ``` + pub fn find_mut<'a>(&'a mut self, k: &K) -> Option<&'a mut V> { + match self.search_mut(k) { + Some(bucket) => { + let (_, v) = bucket.into_mut_refs(); + Some(v) + } + _ => None + } + } + + /// Inserts a key-value pair into the map. An existing value for a + /// key is replaced by the new value. Returns `true` if the key did + /// not already exist in the map. + /// + /// # Example + /// + /// ``` + /// use std::collections::HashMap; + /// + /// let mut map = HashMap::new(); + /// assert_eq!(map.insert(2u, "value"), true); + /// assert_eq!(map.insert(2, "value2"), false); + /// assert_eq!(map[2], "value2"); + /// ``` + #[inline] + pub fn insert(&mut self, key: K, value: V) -> bool { + self.swap(key, value).is_none() + } + + /// Removes a key-value pair from the map. Returns `true` if the key + /// was present in the map. + /// + /// # Example + /// + /// ``` + /// use std::collections::HashMap; + /// + /// let mut map = HashMap::new(); + /// assert_eq!(map.remove(&1u), false); + /// map.insert(1, "a"); + /// assert_eq!(map.remove(&1), true); + /// ``` + #[inline] + pub fn remove(&mut self, key: &K) -> bool { + self.pop(key).is_some() + } + + /// Inserts a key-value pair from the map. If the key already had a value + /// present in the map, that value is returned. Otherwise, `None` is returned. + /// + /// # Example + /// + /// ``` + /// use std::collections::HashMap; + /// + /// let mut map = HashMap::new(); + /// assert_eq!(map.swap(37u, "a"), None); + /// assert_eq!(map.is_empty(), false); + /// + /// map.insert(37, "b"); + /// assert_eq!(map.swap(37, "c"), Some("b")); + /// assert_eq!(map[37], "c"); + /// ``` + pub fn swap(&mut self, k: K, v: V) -> Option { + let hash = self.make_hash(&k); + let potential_new_size = self.table.size() + 1; + self.make_some_room(potential_new_size); + + let mut retval = None; + self.insert_or_replace_with(hash, k, v, |_, val_ref, val| { + retval = Some(replace(val_ref, val)); + }); + retval + } + + /// Removes a key from the map, returning the value at the key if the key + /// was previously in the map. + /// + /// # Example + /// + /// ``` + /// use std::collections::HashMap; + /// + /// let mut map = HashMap::new(); + /// map.insert(1u, "a"); + /// assert_eq!(map.pop(&1), Some("a")); + /// assert_eq!(map.pop(&1), None); + /// ``` + pub fn pop(&mut self, k: &K) -> Option { + if self.table.size() == 0 { + return None + } + + let potential_new_size = self.table.size() - 1; + self.make_some_room(potential_new_size); + + self.search_mut(k).map(|bucket| { + let (_k, val) = pop_internal(bucket); + val + }) + } +} + +fn search_entry_hashed<'a, K: Eq, V>(table: &'a mut RawTable, hash: SafeHash, k: K) + -> Entry<'a, K, V> { + // Worst case, we'll find one empty bucket among `size + 1` buckets. + let size = table.size(); + let mut probe = Bucket::new(table, &hash); + let ib = probe.index(); + + loop { + let bucket = match probe.peek() { + Empty(bucket) => { + // Found a hole! + return Vacant(VacantEntry { + hash: hash, + key: k, + elem: NoElem(bucket), + }); + }, + Full(bucket) => bucket + }; + + if bucket.hash() == hash { + let is_eq = { + let (bucket_k, _) = bucket.read(); + k == *bucket_k + }; + + if is_eq { + return Occupied(OccupiedEntry{ + elem: bucket, + }); + } + } + + let robin_ib = bucket.index() as int - bucket.distance() as int; + + if (ib as int) < robin_ib { + // Found a luckier bucket than me. Better steal his spot. + return Vacant(VacantEntry { + hash: hash, + key: k, + elem: NeqElem(bucket, robin_ib as uint), + }); + } + + probe = bucket.next(); + assert!(probe.index() != ib + size + 1); + } +} + +impl, V: Clone, S, H: Hasher> HashMap { + /// Return a copy of the value corresponding to the key. + /// + /// # Example + /// + /// ``` + /// use std::collections::HashMap; + /// + /// let mut map: HashMap = HashMap::new(); + /// map.insert(1u, "foo".to_string()); + /// let s: String = map.find_copy(&1).unwrap(); + /// ``` + pub fn find_copy(&self, k: &K) -> Option { + self.find(k).map(|v| (*v).clone()) + } + + /// Return a copy of the value corresponding to the key. + /// + /// # Failure + /// + /// Fails if the key is not present. + /// + /// # Example + /// + /// ``` + /// use std::collections::HashMap; + /// + /// let mut map: HashMap = HashMap::new(); + /// map.insert(1u, "foo".to_string()); + /// let s: String = map.get_copy(&1); + /// ``` + pub fn get_copy(&self, k: &K) -> V { + self[*k].clone() + } +} + +impl, V: PartialEq, S, H: Hasher> PartialEq for HashMap { + fn eq(&self, other: &HashMap) -> bool { + if self.len() != other.len() { return false; } + + self.iter().all(|(key, value)| + other.find(key).map_or(false, |v| *value == *v) + ) + } +} + +impl, V: Eq, S, H: Hasher> Eq for HashMap {} + +impl + Show, V: Show, S, H: Hasher> Show for HashMap { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + try!(write!(f, "{{")); + + for (i, (k, v)) in self.iter().enumerate() { + if i != 0 { try!(write!(f, ", ")); } + try!(write!(f, "{}: {}", *k, *v)); + } + + write!(f, "}}") + } +} + +impl, V, S, H: Hasher + Default> Default for HashMap { + fn default() -> HashMap { + HashMap::with_hasher(Default::default()) + } +} + +impl, V, S, H: Hasher> Index for HashMap { + #[inline] + fn index<'a>(&'a self, index: &K) -> &'a V { + self.find(index).expect("no entry found for key") + } +} + +impl, V, S, H: Hasher> IndexMut for HashMap { + #[inline] + fn index_mut<'a>(&'a mut self, index: &K) -> &'a mut V { + match self.find_mut(index) { + Some(v) => v, + None => panic!("no entry found for key") + } + } +} + +/// HashMap iterator +pub struct Entries<'a, K: 'a, V: 'a> { + inner: table::Entries<'a, K, V> +} + +/// HashMap mutable values iterator +pub struct MutEntries<'a, K: 'a, V: 'a> { + inner: table::MutEntries<'a, K, V> +} + +/// HashMap move iterator +pub struct MoveEntries { + inner: iter::Map<'static, (SafeHash, K, V), (K, V), table::MoveEntries> +} + +/// A view into a single occupied location in a HashMap +pub struct OccupiedEntry<'a, K:'a, V:'a> { + elem: FullBucket>, +} + +/// A view into a single empty location in a HashMap +pub struct VacantEntry<'a, K:'a, V:'a> { + hash: SafeHash, + key: K, + elem: VacantEntryState>, +} + +/// A view into a single location in a map, which may be vacant or occupied +pub enum Entry<'a, K:'a, V:'a> { + /// An occupied Entry + Occupied(OccupiedEntry<'a, K, V>), + /// A vacant Entry + Vacant(VacantEntry<'a, K, V>), +} + +/// Possible states of a VacantEntry +enum VacantEntryState { + /// The index is occupied, but the key to insert has precedence, + /// and will kick the current one out on insertion + NeqElem(FullBucket, uint), + /// The index is genuinely vacant + NoElem(EmptyBucket), +} + +impl<'a, K, V> Iterator<(&'a K, &'a V)> for Entries<'a, K, V> { + #[inline] + fn next(&mut self) -> Option<(&'a K, &'a V)> { + self.inner.next() + } + #[inline] + fn size_hint(&self) -> (uint, Option) { + self.inner.size_hint() + } +} + +impl<'a, K, V> Iterator<(&'a K, &'a mut V)> for MutEntries<'a, K, V> { + #[inline] + fn next(&mut self) -> Option<(&'a K, &'a mut V)> { + self.inner.next() + } + #[inline] + fn size_hint(&self) -> (uint, Option) { + self.inner.size_hint() + } +} + +impl Iterator<(K, V)> for MoveEntries { + #[inline] + fn next(&mut self) -> Option<(K, V)> { + self.inner.next() + } + #[inline] + fn size_hint(&self) -> (uint, Option) { + self.inner.size_hint() + } +} + +impl<'a, K, V> OccupiedEntry<'a, K, V> { + /// Gets a reference to the value in the entry + pub fn get(&self) -> &V { + let (_, v) = self.elem.read(); + v + } + + /// Gets a mutable reference to the value in the entry + pub fn get_mut(&mut self) -> &mut V { + let (_, v) = self.elem.read_mut(); + v + } + + /// Converts the OccupiedEntry into a mutable reference to the value in the entry + /// with a lifetime bound to the map itself + pub fn into_mut(self) -> &'a mut V { + let (_, v) = self.elem.into_mut_refs(); + v + } + + /// Sets the value of the entry, and returns the entry's old value + pub fn set(&mut self, mut value: V) -> V { + let old_value = self.get_mut(); + mem::swap(&mut value, old_value); + value + } + + /// Takes the value out of the entry, and returns it + pub fn take(self) -> V { + let (_, _, v) = self.elem.take(); + v + } +} + +impl<'a, K, V> VacantEntry<'a, K, V> { + /// Sets the value of the entry with the VacantEntry's key, + /// and returns a mutable reference to it + pub fn set(self, value: V) -> &'a mut V { + match self.elem { + NeqElem(bucket, ib) => { + robin_hood(bucket, ib, self.hash, self.key, value) + } + NoElem(bucket) => { + let full = bucket.put(self.hash, self.key, value); + let (_, v) = full.into_mut_refs(); + v + } + } + } +} + +/// HashMap keys iterator +pub type Keys<'a, K, V> = + iter::Map<'static, (&'a K, &'a V), &'a K, Entries<'a, K, V>>; + +/// HashMap values iterator +pub type Values<'a, K, V> = + iter::Map<'static, (&'a K, &'a V), &'a V, Entries<'a, K, V>>; + +impl, V, S, H: Hasher + Default> FromIterator<(K, V)> for HashMap { + fn from_iter>(iter: T) -> HashMap { + let (lower, _) = iter.size_hint(); + let mut map = HashMap::with_capacity_and_hasher(lower, Default::default()); + map.extend(iter); + map + } +} + +impl, V, S, H: Hasher + Default> Extendable<(K, V)> for HashMap { + fn extend>(&mut self, mut iter: T) { + for (k, v) in iter { + self.insert(k, v); + } + } +} + +#[cfg(test)] +mod test_map { + use prelude::*; + + use super::HashMap; + use super::{Occupied, Vacant}; + use cmp::Equiv; + use hash; + use iter::{Iterator,range_inclusive,range_step_inclusive}; + use cell::RefCell; + + struct KindaIntLike(int); + + impl Equiv for KindaIntLike { + fn equiv(&self, other: &int) -> bool { + let KindaIntLike(this) = *self; + this == *other + } + } + impl hash::Hash for KindaIntLike { + fn hash(&self, state: &mut S) { + let KindaIntLike(this) = *self; + this.hash(state) + } + } + + #[test] + fn test_create_capacity_zero() { + let mut m = HashMap::with_capacity(0); + + assert!(m.insert(1i, 1i)); + + assert!(m.contains_key(&1)); + assert!(!m.contains_key(&0)); + } + + #[test] + fn test_insert() { + let mut m = HashMap::new(); + assert_eq!(m.len(), 0); + assert!(m.insert(1i, 2i)); + assert_eq!(m.len(), 1); + assert!(m.insert(2i, 4i)); + assert_eq!(m.len(), 2); + assert_eq!(*m.find(&1).unwrap(), 2); + assert_eq!(*m.find(&2).unwrap(), 4); + } + + local_data_key!(drop_vector: RefCell>) + + #[deriving(Hash, PartialEq, Eq)] + struct Dropable { + k: uint + } + + impl Dropable { + fn new(k: uint) -> Dropable { + let v = drop_vector.get().unwrap(); + v.borrow_mut().as_mut_slice()[k] += 1; + + Dropable { k: k } + } + } + + impl Drop for Dropable { + fn drop(&mut self) { + let v = drop_vector.get().unwrap(); + v.borrow_mut().as_mut_slice()[self.k] -= 1; + } + } + + impl Clone for Dropable { + fn clone(&self) -> Dropable { + Dropable::new(self.k) + } + } + + #[test] + fn test_drops() { + drop_vector.replace(Some(RefCell::new(Vec::from_elem(200, 0i)))); + + { + let mut m = HashMap::new(); + + let v = drop_vector.get().unwrap(); + for i in range(0u, 200) { + assert_eq!(v.borrow().as_slice()[i], 0); + } + drop(v); + + for i in range(0u, 100) { + let d1 = Dropable::new(i); + let d2 = Dropable::new(i+100); + m.insert(d1, d2); + } + + let v = drop_vector.get().unwrap(); + for i in range(0u, 200) { + assert_eq!(v.borrow().as_slice()[i], 1); + } + drop(v); + + for i in range(0u, 50) { + let k = Dropable::new(i); + let v = m.pop(&k); + + assert!(v.is_some()); + + let v = drop_vector.get().unwrap(); + assert_eq!(v.borrow().as_slice()[i], 1); + assert_eq!(v.borrow().as_slice()[i+100], 1); + } + + let v = drop_vector.get().unwrap(); + for i in range(0u, 50) { + assert_eq!(v.borrow().as_slice()[i], 0); + assert_eq!(v.borrow().as_slice()[i+100], 0); + } + + for i in range(50u, 100) { + assert_eq!(v.borrow().as_slice()[i], 1); + assert_eq!(v.borrow().as_slice()[i+100], 1); + } + } + + let v = drop_vector.get().unwrap(); + for i in range(0u, 200) { + assert_eq!(v.borrow().as_slice()[i], 0); + } + } + + #[test] + fn test_move_iter_drops() { + drop_vector.replace(Some(RefCell::new(Vec::from_elem(200, 0i)))); + + let hm = { + let mut hm = HashMap::new(); + + let v = drop_vector.get().unwrap(); + for i in range(0u, 200) { + assert_eq!(v.borrow().as_slice()[i], 0); + } + drop(v); + + for i in range(0u, 100) { + let d1 = Dropable::new(i); + let d2 = Dropable::new(i+100); + hm.insert(d1, d2); + } + + let v = drop_vector.get().unwrap(); + for i in range(0u, 200) { + assert_eq!(v.borrow().as_slice()[i], 1); + } + drop(v); + + hm + }; + + // By the way, ensure that cloning doesn't screw up the dropping. + drop(hm.clone()); + + { + let mut half = hm.into_iter().take(50); + + let v = drop_vector.get().unwrap(); + for i in range(0u, 200) { + assert_eq!(v.borrow().as_slice()[i], 1); + } + drop(v); + + for _ in half {} + + let v = drop_vector.get().unwrap(); + let nk = range(0u, 100).filter(|&i| { + v.borrow().as_slice()[i] == 1 + }).count(); + + let nv = range(0u, 100).filter(|&i| { + v.borrow().as_slice()[i+100] == 1 + }).count(); + + assert_eq!(nk, 50); + assert_eq!(nv, 50); + }; + + let v = drop_vector.get().unwrap(); + for i in range(0u, 200) { + assert_eq!(v.borrow().as_slice()[i], 0); + } + } + + #[test] + fn test_empty_pop() { + let mut m: HashMap = HashMap::new(); + assert_eq!(m.pop(&0), None); + } + + #[test] + fn test_lots_of_insertions() { + let mut m = HashMap::new(); + + // Try this a few times to make sure we never screw up the hashmap's + // internal state. + for _ in range(0i, 10) { + assert!(m.is_empty()); + + for i in range_inclusive(1i, 1000) { + assert!(m.insert(i, i)); + + for j in range_inclusive(1, i) { + let r = m.find(&j); + assert_eq!(r, Some(&j)); + } + + for j in range_inclusive(i+1, 1000) { + let r = m.find(&j); + assert_eq!(r, None); + } + } + + for i in range_inclusive(1001i, 2000) { + assert!(!m.contains_key(&i)); + } + + // remove forwards + for i in range_inclusive(1i, 1000) { + assert!(m.remove(&i)); + + for j in range_inclusive(1, i) { + assert!(!m.contains_key(&j)); + } + + for j in range_inclusive(i+1, 1000) { + assert!(m.contains_key(&j)); + } + } + + for i in range_inclusive(1i, 1000) { + assert!(!m.contains_key(&i)); + } + + for i in range_inclusive(1i, 1000) { + assert!(m.insert(i, i)); + } + + // remove backwards + for i in range_step_inclusive(1000i, 1, -1) { + assert!(m.remove(&i)); + + for j in range_inclusive(i, 1000) { + assert!(!m.contains_key(&j)); + } + + for j in range_inclusive(1, i-1) { + assert!(m.contains_key(&j)); + } + } + } + } + + #[test] + fn test_find_mut() { + let mut m = HashMap::new(); + assert!(m.insert(1i, 12i)); + assert!(m.insert(2i, 8i)); + assert!(m.insert(5i, 14i)); + let new = 100; + match m.find_mut(&5) { + None => panic!(), Some(x) => *x = new + } + assert_eq!(m.find(&5), Some(&new)); + } + + #[test] + fn test_insert_overwrite() { + let mut m = HashMap::new(); + assert!(m.insert(1i, 2i)); + assert_eq!(*m.find(&1).unwrap(), 2); + assert!(!m.insert(1i, 3i)); + assert_eq!(*m.find(&1).unwrap(), 3); + } + + #[test] + fn test_insert_conflicts() { + let mut m = HashMap::with_capacity(4); + assert!(m.insert(1i, 2i)); + assert!(m.insert(5i, 3i)); + assert!(m.insert(9i, 4i)); + assert_eq!(*m.find(&9).unwrap(), 4); + assert_eq!(*m.find(&5).unwrap(), 3); + assert_eq!(*m.find(&1).unwrap(), 2); + } + + #[test] + fn test_conflict_remove() { + let mut m = HashMap::with_capacity(4); + assert!(m.insert(1i, 2i)); + assert_eq!(*m.find(&1).unwrap(), 2); + assert!(m.insert(5, 3)); + assert_eq!(*m.find(&1).unwrap(), 2); + assert_eq!(*m.find(&5).unwrap(), 3); + assert!(m.insert(9, 4)); + assert_eq!(*m.find(&1).unwrap(), 2); + assert_eq!(*m.find(&5).unwrap(), 3); + assert_eq!(*m.find(&9).unwrap(), 4); + assert!(m.remove(&1)); + assert_eq!(*m.find(&9).unwrap(), 4); + assert_eq!(*m.find(&5).unwrap(), 3); + } + + #[test] + fn test_is_empty() { + let mut m = HashMap::with_capacity(4); + assert!(m.insert(1i, 2i)); + assert!(!m.is_empty()); + assert!(m.remove(&1)); + assert!(m.is_empty()); + } + + #[test] + fn test_pop() { + let mut m = HashMap::new(); + m.insert(1i, 2i); + assert_eq!(m.pop(&1), Some(2)); + assert_eq!(m.pop(&1), None); + } + + #[test] + #[allow(experimental)] + fn test_pop_equiv() { + let mut m = HashMap::new(); + m.insert(1i, 2i); + assert_eq!(m.pop_equiv(&KindaIntLike(1)), Some(2)); + assert_eq!(m.pop_equiv(&KindaIntLike(1)), None); + } + + #[test] + fn test_swap() { + let mut m = HashMap::new(); + assert_eq!(m.swap(1i, 2i), None); + assert_eq!(m.swap(1i, 3i), Some(2)); + assert_eq!(m.swap(1i, 4i), Some(3)); + } + + #[test] + fn test_iterate() { + let mut m = HashMap::with_capacity(4); + for i in range(0u, 32) { + assert!(m.insert(i, i*2)); + } + assert_eq!(m.len(), 32); + + let mut observed: u32 = 0; + + for (k, v) in m.iter() { + assert_eq!(*v, *k * 2); + observed |= 1 << *k; + } + assert_eq!(observed, 0xFFFF_FFFF); + } + + #[test] + fn test_keys() { + let vec = vec![(1i, 'a'), (2i, 'b'), (3i, 'c')]; + let map = vec.into_iter().collect::>(); + let keys = map.keys().map(|&k| k).collect::>(); + assert_eq!(keys.len(), 3); + assert!(keys.contains(&1)); + assert!(keys.contains(&2)); + assert!(keys.contains(&3)); + } + + #[test] + fn test_values() { + let vec = vec![(1i, 'a'), (2i, 'b'), (3i, 'c')]; + let map = vec.into_iter().collect::>(); + let values = map.values().map(|&v| v).collect::>(); + assert_eq!(values.len(), 3); + assert!(values.contains(&'a')); + assert!(values.contains(&'b')); + assert!(values.contains(&'c')); + } + + #[test] + fn test_find() { + let mut m = HashMap::new(); + assert!(m.find(&1i).is_none()); + m.insert(1i, 2i); + match m.find(&1) { + None => panic!(), + Some(v) => assert_eq!(*v, 2) + } + } + + #[test] + fn test_find_copy() { + let mut m = HashMap::new(); + assert!(m.find(&1i).is_none()); + + for i in range(1i, 10000) { + m.insert(i, i + 7); + match m.find_copy(&i) { + None => panic!(), + Some(v) => assert_eq!(v, i + 7) + } + for j in range(1i, i/100) { + match m.find_copy(&j) { + None => panic!(), + Some(v) => assert_eq!(v, j + 7) + } + } + } + } + + #[test] + fn test_eq() { + let mut m1 = HashMap::new(); + m1.insert(1i, 2i); + m1.insert(2i, 3i); + m1.insert(3i, 4i); + + let mut m2 = HashMap::new(); + m2.insert(1i, 2i); + m2.insert(2i, 3i); + + assert!(m1 != m2); + + m2.insert(3i, 4i); + + assert_eq!(m1, m2); + } + + #[test] + fn test_show() { + let mut map: HashMap = HashMap::new(); + let empty: HashMap = HashMap::new(); + + map.insert(1i, 2i); + map.insert(3i, 4i); + + let map_str = format!("{}", map); + + assert!(map_str == "{1: 2, 3: 4}".to_string() || map_str == "{3: 4, 1: 2}".to_string()); + assert_eq!(format!("{}", empty), "{}".to_string()); + } + + #[test] + fn test_expand() { + let mut m = HashMap::new(); + + assert_eq!(m.len(), 0); + assert!(m.is_empty()); + + let mut i = 0u; + let old_cap = m.table.capacity(); + while old_cap == m.table.capacity() { + m.insert(i, i); + i += 1; + } + + assert_eq!(m.len(), i); + assert!(!m.is_empty()); + } + + #[test] + fn test_resize_policy() { + let mut m = HashMap::new(); + + assert_eq!(m.len(), 0); + assert_eq!(m.table.capacity(), 0); + assert!(m.is_empty()); + + m.insert(0, 0); + m.remove(&0); + assert!(m.is_empty()); + let initial_cap = m.table.capacity(); + m.reserve(initial_cap * 2); + let cap = m.table.capacity(); + + assert_eq!(cap, initial_cap * 2); + + let mut i = 0u; + for _ in range(0, cap * 3 / 4) { + m.insert(i, i); + i += 1; + } + // three quarters full + + assert_eq!(m.len(), i); + assert_eq!(m.table.capacity(), cap); + + for _ in range(0, cap / 4) { + m.insert(i, i); + i += 1; + } + // half full + + let new_cap = m.table.capacity(); + assert_eq!(new_cap, cap * 2); + + for _ in range(0, cap / 2 - 1) { + i -= 1; + m.remove(&i); + assert_eq!(m.table.capacity(), new_cap); + } + // A little more than one quarter full. + // Shrinking starts as we remove more elements: + for _ in range(0, cap / 2 - 1) { + i -= 1; + m.remove(&i); + } + + assert_eq!(m.len(), i); + assert!(!m.is_empty()); + assert_eq!(m.table.capacity(), cap); + } + + #[test] + fn test_find_equiv() { + let mut m = HashMap::new(); + + let (foo, bar, baz) = (1i,2i,3i); + m.insert("foo".to_string(), foo); + m.insert("bar".to_string(), bar); + m.insert("baz".to_string(), baz); + + + assert_eq!(m.find_equiv("foo"), Some(&foo)); + assert_eq!(m.find_equiv("bar"), Some(&bar)); + assert_eq!(m.find_equiv("baz"), Some(&baz)); + + assert_eq!(m.find_equiv("qux"), None); + } + + #[test] + fn test_from_iter() { + let xs = [(1i, 1i), (2, 2), (3, 3), (4, 4), (5, 5), (6, 6)]; + + let map: HashMap = xs.iter().map(|&x| x).collect(); + + for &(k, v) in xs.iter() { + assert_eq!(map.find(&k), Some(&v)); + } + } + + #[test] + fn test_size_hint() { + let xs = [(1i, 1i), (2, 2), (3, 3), (4, 4), (5, 5), (6, 6)]; + + let map: HashMap = xs.iter().map(|&x| x).collect(); + + let mut iter = map.iter(); + + for _ in iter.by_ref().take(3) {} + + assert_eq!(iter.size_hint(), (3, Some(3))); + } + + #[test] + fn test_mut_size_hint() { + let xs = [(1i, 1i), (2, 2), (3, 3), (4, 4), (5, 5), (6, 6)]; + + let mut map: HashMap = xs.iter().map(|&x| x).collect(); + + let mut iter = map.iter_mut(); + + for _ in iter.by_ref().take(3) {} + + assert_eq!(iter.size_hint(), (3, Some(3))); + } + + #[test] + fn test_index() { + let mut map: HashMap = HashMap::new(); + + map.insert(1, 2); + map.insert(2, 1); + map.insert(3, 4); + + assert_eq!(map[2], 1); + } + + #[test] + #[should_fail] + fn test_index_nonexistent() { + let mut map: HashMap = HashMap::new(); + + map.insert(1, 2); + map.insert(2, 1); + map.insert(3, 4); + + map[4]; + } + + #[test] + fn test_entry(){ + let xs = [(1i, 10i), (2, 20), (3, 30), (4, 40), (5, 50), (6, 60)]; + + let mut map: HashMap = xs.iter().map(|&x| x).collect(); + + // Existing key (insert) + match map.entry(1) { + Vacant(_) => unreachable!(), + Occupied(mut view) => { + assert_eq!(view.get(), &10); + assert_eq!(view.set(100), 10); + } + } + assert_eq!(map.find(&1).unwrap(), &100); + assert_eq!(map.len(), 6); + + + // Existing key (update) + match map.entry(2) { + Vacant(_) => unreachable!(), + Occupied(mut view) => { + let v = view.get_mut(); + let new_v = (*v) * 10; + *v = new_v; + } + } + assert_eq!(map.find(&2).unwrap(), &200); + assert_eq!(map.len(), 6); + + // Existing key (take) + match map.entry(3) { + Vacant(_) => unreachable!(), + Occupied(view) => { + assert_eq!(view.take(), 30); + } + } + assert_eq!(map.find(&3), None); + assert_eq!(map.len(), 5); + + + // Inexistent key (insert) + match map.entry(10) { + Occupied(_) => unreachable!(), + Vacant(view) => { + assert_eq!(*view.set(1000), 1000); + } + } + assert_eq!(map.find(&10).unwrap(), &1000); + assert_eq!(map.len(), 6); + } +} diff --git a/src/libstd/collections/hash/mod.rs b/src/libstd/collections/hash/mod.rs new file mode 100644 index 00000000000..ee3fc1e6ac3 --- /dev/null +++ b/src/libstd/collections/hash/mod.rs @@ -0,0 +1,16 @@ +// Copyright 2014 The Rust Project Developers. See the COPYRIGHT +// file at the top-level directory of this distribution and at +// http://rust-lang.org/COPYRIGHT. +// +// Licensed under the Apache License, Version 2.0 or the MIT license +// , at your +// option. This file may not be copied, modified, or distributed +// except according to those terms. + +//! Unordered containers, implemented as hash-tables + +mod bench; +pub mod map; +pub mod set; +mod table; diff --git a/src/libstd/collections/hash/set.rs b/src/libstd/collections/hash/set.rs new file mode 100644 index 00000000000..823bd49d7a6 --- /dev/null +++ b/src/libstd/collections/hash/set.rs @@ -0,0 +1,834 @@ +// Copyright 2014 The Rust Project Developers. See the COPYRIGHT +// file at the top-level directory of this distribution and at +// http://rust-lang.org/COPYRIGHT. +// +// Licensed under the Apache License, Version 2.0 or the MIT license +// , at your +// option. This file may not be copied, modified, or distributed +// except according to those terms. +// +// ignore-lexer-test FIXME #15883 + +use clone::Clone; +use cmp::{Eq, Equiv, PartialEq}; +use core::kinds::Sized; +use default::Default; +use fmt::Show; +use fmt; +use hash::{Hash, Hasher, RandomSipHasher}; +use iter::{Iterator, FromIterator, FilterMap, Chain, Repeat, Zip, Extendable}; +use iter; +use option::{Some, None}; +use result::{Ok, Err}; + +use super::map::{HashMap, Entries, MoveEntries, INITIAL_CAPACITY}; + + +// Future Optimization (FIXME!) +// ============================= +// +// Iteration over zero sized values is a noop. There is no need +// for `bucket.val` in the case of HashSet. I suppose we would need HKT +// to get rid of it properly. + +/// An implementation of a hash set using the underlying representation of a +/// HashMap where the value is (). As with the `HashMap` type, a `HashSet` +/// requires that the elements implement the `Eq` and `Hash` traits. +/// +/// # Example +/// +/// ``` +/// use std::collections::HashSet; +/// // Type inference lets us omit an explicit type signature (which +/// // would be `HashSet<&str>` in this example). +/// let mut books = HashSet::new(); +/// +/// // Add some books. +/// books.insert("A Dance With Dragons"); +/// books.insert("To Kill a Mockingbird"); +/// books.insert("The Odyssey"); +/// books.insert("The Great Gatsby"); +/// +/// // Check for a specific one. +/// if !books.contains(&("The Winds of Winter")) { +/// println!("We have {} books, but The Winds of Winter ain't one.", +/// books.len()); +/// } +/// +/// // Remove a book. +/// books.remove(&"The Odyssey"); +/// +/// // Iterate over everything. +/// for book in books.iter() { +/// println!("{}", *book); +/// } +/// ``` +/// +/// The easiest way to use `HashSet` with a custom type is to derive +/// `Eq` and `Hash`. We must also derive `PartialEq`, this will in the +/// future be implied by `Eq`. +/// +/// ``` +/// use std::collections::HashSet; +/// #[deriving(Hash, Eq, PartialEq, Show)] +/// struct Viking<'a> { +/// name: &'a str, +/// power: uint, +/// } +/// +/// let mut vikings = HashSet::new(); +/// +/// vikings.insert(Viking { name: "Einar", power: 9u }); +/// vikings.insert(Viking { name: "Einar", power: 9u }); +/// vikings.insert(Viking { name: "Olaf", power: 4u }); +/// vikings.insert(Viking { name: "Harald", power: 8u }); +/// +/// // Use derived implementation to print the vikings. +/// for x in vikings.iter() { +/// println!("{}", x); +/// } +/// ``` +#[deriving(Clone)] +pub struct HashSet { + map: HashMap +} + +impl HashSet { + /// Create an empty HashSet. + /// + /// # Example + /// + /// ``` + /// use std::collections::HashSet; + /// let mut set: HashSet = HashSet::new(); + /// ``` + #[inline] + pub fn new() -> HashSet { + HashSet::with_capacity(INITIAL_CAPACITY) + } + + /// Create an empty HashSet with space for at least `n` elements in + /// the hash table. + /// + /// # Example + /// + /// ``` + /// use std::collections::HashSet; + /// let mut set: HashSet = HashSet::with_capacity(10); + /// ``` + #[inline] + pub fn with_capacity(capacity: uint) -> HashSet { + HashSet { map: HashMap::with_capacity(capacity) } + } +} + +impl, S, H: Hasher> HashSet { + /// Creates a new empty hash set which will use the given hasher to hash + /// keys. + /// + /// The hash set is also created with the default initial capacity. + /// + /// # Example + /// + /// ``` + /// use std::collections::HashSet; + /// use std::hash::sip::SipHasher; + /// + /// let h = SipHasher::new(); + /// let mut set = HashSet::with_hasher(h); + /// set.insert(2u); + /// ``` + #[inline] + pub fn with_hasher(hasher: H) -> HashSet { + HashSet::with_capacity_and_hasher(INITIAL_CAPACITY, hasher) + } + + /// Create an empty HashSet with space for at least `capacity` + /// elements in the hash table, using `hasher` to hash the keys. + /// + /// Warning: `hasher` is normally randomly generated, and + /// is designed to allow `HashSet`s to be resistant to attacks that + /// cause many collisions and very poor performance. Setting it + /// manually using this function can expose a DoS attack vector. + /// + /// # Example + /// + /// ``` + /// use std::collections::HashSet; + /// use std::hash::sip::SipHasher; + /// + /// let h = SipHasher::new(); + /// let mut set = HashSet::with_capacity_and_hasher(10u, h); + /// set.insert(1i); + /// ``` + #[inline] + pub fn with_capacity_and_hasher(capacity: uint, hasher: H) -> HashSet { + HashSet { map: HashMap::with_capacity_and_hasher(capacity, hasher) } + } + + /// Reserve space for at least `n` elements in the hash table. + /// + /// # Example + /// + /// ``` + /// use std::collections::HashSet; + /// let mut set: HashSet = HashSet::new(); + /// set.reserve(10); + /// ``` + pub fn reserve(&mut self, n: uint) { + self.map.reserve(n) + } + + /// Returns true if the hash set contains a value equivalent to the + /// given query value. + /// + /// # Example + /// + /// This is a slightly silly example where we define the number's + /// parity as the equivalance class. It is important that the + /// values hash the same, which is why we implement `Hash`. + /// + /// ``` + /// use std::collections::HashSet; + /// use std::hash::Hash; + /// use std::hash::sip::SipState; + /// + /// #[deriving(Eq, PartialEq)] + /// struct EvenOrOdd { + /// num: uint + /// }; + /// + /// impl Hash for EvenOrOdd { + /// fn hash(&self, state: &mut SipState) { + /// let parity = self.num % 2; + /// parity.hash(state); + /// } + /// } + /// + /// impl Equiv for EvenOrOdd { + /// fn equiv(&self, other: &EvenOrOdd) -> bool { + /// self.num % 2 == other.num % 2 + /// } + /// } + /// + /// let mut set = HashSet::new(); + /// set.insert(EvenOrOdd { num: 3u }); + /// + /// assert!(set.contains_equiv(&EvenOrOdd { num: 3u })); + /// assert!(set.contains_equiv(&EvenOrOdd { num: 5u })); + /// assert!(!set.contains_equiv(&EvenOrOdd { num: 4u })); + /// assert!(!set.contains_equiv(&EvenOrOdd { num: 2u })); + /// + /// ``` + pub fn contains_equiv + Equiv>(&self, value: &Q) -> bool { + self.map.contains_key_equiv(value) + } + + /// An iterator visiting all elements in arbitrary order. + /// Iterator element type is &'a T. + /// + /// # Example + /// + /// ``` + /// use std::collections::HashSet; + /// let mut set = HashSet::new(); + /// set.insert("a"); + /// set.insert("b"); + /// + /// // Will print in an arbitrary order. + /// for x in set.iter() { + /// println!("{}", x); + /// } + /// ``` + pub fn iter<'a>(&'a self) -> SetItems<'a, T> { + self.map.keys() + } + + /// Deprecated: use `into_iter`. + #[deprecated = "use into_iter"] + pub fn move_iter(self) -> SetMoveItems { + self.into_iter() + } + + /// Creates a consuming iterator, that is, one that moves each value out + /// of the set in arbitrary order. The set cannot be used after calling + /// this. + /// + /// # Example + /// + /// ``` + /// use std::collections::HashSet; + /// let mut set = HashSet::new(); + /// set.insert("a".to_string()); + /// set.insert("b".to_string()); + /// + /// // Not possible to collect to a Vec with a regular `.iter()`. + /// let v: Vec = set.into_iter().collect(); + /// + /// // Will print in an arbitrary order. + /// for x in v.iter() { + /// println!("{}", x); + /// } + /// ``` + pub fn into_iter(self) -> SetMoveItems { + self.map.into_iter().map(|(k, _)| k) + } + + /// Visit the values representing the difference. + /// + /// # Example + /// + /// ``` + /// use std::collections::HashSet; + /// let a: HashSet = [1i, 2, 3].iter().map(|&x| x).collect(); + /// let b: HashSet = [4i, 2, 3, 4].iter().map(|&x| x).collect(); + /// + /// // Can be seen as `a - b`. + /// for x in a.difference(&b) { + /// println!("{}", x); // Print 1 + /// } + /// + /// let diff: HashSet = a.difference(&b).map(|&x| x).collect(); + /// assert_eq!(diff, [1i].iter().map(|&x| x).collect()); + /// + /// // Note that difference is not symmetric, + /// // and `b - a` means something else: + /// let diff: HashSet = b.difference(&a).map(|&x| x).collect(); + /// assert_eq!(diff, [4i].iter().map(|&x| x).collect()); + /// ``` + pub fn difference<'a>(&'a self, other: &'a HashSet) -> SetAlgebraItems<'a, T, H> { + Repeat::new(other).zip(self.iter()) + .filter_map(|(other, elt)| { + if !other.contains(elt) { Some(elt) } else { None } + }) + } + + /// Visit the values representing the symmetric difference. + /// + /// # Example + /// + /// ``` + /// use std::collections::HashSet; + /// let a: HashSet = [1i, 2, 3].iter().map(|&x| x).collect(); + /// let b: HashSet = [4i, 2, 3, 4].iter().map(|&x| x).collect(); + /// + /// // Print 1, 4 in arbitrary order. + /// for x in a.symmetric_difference(&b) { + /// println!("{}", x); + /// } + /// + /// let diff1: HashSet = a.symmetric_difference(&b).map(|&x| x).collect(); + /// let diff2: HashSet = b.symmetric_difference(&a).map(|&x| x).collect(); + /// + /// assert_eq!(diff1, diff2); + /// assert_eq!(diff1, [1i, 4].iter().map(|&x| x).collect()); + /// ``` + pub fn symmetric_difference<'a>(&'a self, other: &'a HashSet) + -> Chain, SetAlgebraItems<'a, T, H>> { + self.difference(other).chain(other.difference(self)) + } + + /// Visit the values representing the intersection. + /// + /// # Example + /// + /// ``` + /// use std::collections::HashSet; + /// let a: HashSet = [1i, 2, 3].iter().map(|&x| x).collect(); + /// let b: HashSet = [4i, 2, 3, 4].iter().map(|&x| x).collect(); + /// + /// // Print 2, 3 in arbitrary order. + /// for x in a.intersection(&b) { + /// println!("{}", x); + /// } + /// + /// let diff: HashSet = a.intersection(&b).map(|&x| x).collect(); + /// assert_eq!(diff, [2i, 3].iter().map(|&x| x).collect()); + /// ``` + pub fn intersection<'a>(&'a self, other: &'a HashSet) + -> SetAlgebraItems<'a, T, H> { + Repeat::new(other).zip(self.iter()) + .filter_map(|(other, elt)| { + if other.contains(elt) { Some(elt) } else { None } + }) + } + + /// Visit the values representing the union. + /// + /// # Example + /// + /// ``` + /// use std::collections::HashSet; + /// let a: HashSet = [1i, 2, 3].iter().map(|&x| x).collect(); + /// let b: HashSet = [4i, 2, 3, 4].iter().map(|&x| x).collect(); + /// + /// // Print 1, 2, 3, 4 in arbitrary order. + /// for x in a.union(&b) { + /// println!("{}", x); + /// } + /// + /// let diff: HashSet = a.union(&b).map(|&x| x).collect(); + /// assert_eq!(diff, [1i, 2, 3, 4].iter().map(|&x| x).collect()); + /// ``` + pub fn union<'a>(&'a self, other: &'a HashSet) + -> Chain, SetAlgebraItems<'a, T, H>> { + self.iter().chain(other.difference(self)) + } + + /// Return the number of elements in the set + /// + /// # Example + /// + /// ``` + /// use std::collections::HashSet; + /// + /// let mut v = HashSet::new(); + /// assert_eq!(v.len(), 0); + /// v.insert(1u); + /// assert_eq!(v.len(), 1); + /// ``` + pub fn len(&self) -> uint { self.map.len() } + + /// Returns true if the set contains no elements + /// + /// # Example + /// + /// ``` + /// use std::collections::HashSet; + /// + /// let mut v = HashSet::new(); + /// assert!(v.is_empty()); + /// v.insert(1u); + /// assert!(!v.is_empty()); + /// ``` + pub fn is_empty(&self) -> bool { self.map.len() == 0 } + + /// Clears the set, removing all values. + /// + /// # Example + /// + /// ``` + /// use std::collections::HashSet; + /// + /// let mut v = HashSet::new(); + /// v.insert(1u); + /// v.clear(); + /// assert!(v.is_empty()); + /// ``` + pub fn clear(&mut self) { self.map.clear() } + + /// Returns `true` if the set contains a value. + /// + /// # Example + /// + /// ``` + /// use std::collections::HashSet; + /// + /// let set: HashSet = [1, 2, 3].iter().map(|&x| x).collect(); + /// assert_eq!(set.contains(&1), true); + /// assert_eq!(set.contains(&4), false); + /// ``` + pub fn contains(&self, value: &T) -> bool { self.map.contains_key(value) } + + /// Returns `true` if the set has no elements in common with `other`. + /// This is equivalent to checking for an empty intersection. + /// + /// # Example + /// + /// ``` + /// use std::collections::HashSet; + /// + /// let a: HashSet = [1, 2, 3].iter().map(|&x| x).collect(); + /// let mut b: HashSet = HashSet::new(); + /// + /// assert_eq!(a.is_disjoint(&b), true); + /// b.insert(4); + /// assert_eq!(a.is_disjoint(&b), true); + /// b.insert(1); + /// assert_eq!(a.is_disjoint(&b), false); + /// ``` + pub fn is_disjoint(&self, other: &HashSet) -> bool { + self.iter().all(|v| !other.contains(v)) + } + + /// Returns `true` if the set is a subset of another. + /// + /// # Example + /// + /// ``` + /// use std::collections::HashSet; + /// + /// let sup: HashSet = [1, 2, 3].iter().map(|&x| x).collect(); + /// let mut set: HashSet = HashSet::new(); + /// + /// assert_eq!(set.is_subset(&sup), true); + /// set.insert(2); + /// assert_eq!(set.is_subset(&sup), true); + /// set.insert(4); + /// assert_eq!(set.is_subset(&sup), false); + /// ``` + pub fn is_subset(&self, other: &HashSet) -> bool { + self.iter().all(|v| other.contains(v)) + } + + /// Returns `true` if the set is a superset of another. + /// + /// # Example + /// + /// ``` + /// use std::collections::HashSet; + /// + /// let sub: HashSet = [1, 2].iter().map(|&x| x).collect(); + /// let mut set: HashSet = HashSet::new(); + /// + /// assert_eq!(set.is_superset(&sub), false); + /// + /// set.insert(0); + /// set.insert(1); + /// assert_eq!(set.is_superset(&sub), false); + /// + /// set.insert(2); + /// assert_eq!(set.is_superset(&sub), true); + /// ``` + #[inline] + pub fn is_superset(&self, other: &HashSet) -> bool { + other.is_subset(self) + } + + /// Adds a value to the set. Returns `true` if the value was not already + /// present in the set. + /// + /// # Example + /// + /// ``` + /// use std::collections::HashSet; + /// + /// let mut set = HashSet::new(); + /// + /// assert_eq!(set.insert(2u), true); + /// assert_eq!(set.insert(2), false); + /// assert_eq!(set.len(), 1); + /// ``` + pub fn insert(&mut self, value: T) -> bool { self.map.insert(value, ()) } + + /// Removes a value from the set. Returns `true` if the value was + /// present in the set. + /// + /// # Example + /// + /// ``` + /// use std::collections::HashSet; + /// + /// let mut set = HashSet::new(); + /// + /// set.insert(2u); + /// assert_eq!(set.remove(&2), true); + /// assert_eq!(set.remove(&2), false); + /// ``` + pub fn remove(&mut self, value: &T) -> bool { self.map.remove(value) } +} + +impl, S, H: Hasher> PartialEq for HashSet { + fn eq(&self, other: &HashSet) -> bool { + if self.len() != other.len() { return false; } + + self.iter().all(|key| other.contains(key)) + } +} + +impl, S, H: Hasher> Eq for HashSet {} + +impl + fmt::Show, S, H: Hasher> fmt::Show for HashSet { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + try!(write!(f, "{{")); + + for (i, x) in self.iter().enumerate() { + if i != 0 { try!(write!(f, ", ")); } + try!(write!(f, "{}", *x)); + } + + write!(f, "}}") + } +} + +impl, S, H: Hasher + Default> FromIterator for HashSet { + fn from_iter>(iter: I) -> HashSet { + let (lower, _) = iter.size_hint(); + let mut set = HashSet::with_capacity_and_hasher(lower, Default::default()); + set.extend(iter); + set + } +} + +impl, S, H: Hasher + Default> Extendable for HashSet { + fn extend>(&mut self, mut iter: I) { + for k in iter { + self.insert(k); + } + } +} + +impl, S, H: Hasher + Default> Default for HashSet { + fn default() -> HashSet { + HashSet::with_hasher(Default::default()) + } +} + +/// HashSet iterator +pub type SetItems<'a, K> = + iter::Map<'static, (&'a K, &'a ()), &'a K, Entries<'a, K, ()>>; + +/// HashSet move iterator +pub type SetMoveItems = + iter::Map<'static, (K, ()), K, MoveEntries>; + +// `Repeat` is used to feed the filter closure an explicit capture +// of a reference to the other set +/// Set operations iterator +pub type SetAlgebraItems<'a, T, H> = + FilterMap<'static, (&'a HashSet, &'a T), &'a T, + Zip>, SetItems<'a, T>>>; + +#[cfg(test)] +mod test_set { + use prelude::*; + + use super::HashSet; + use slice::ImmutablePartialEqSlice; + + #[test] + fn test_disjoint() { + let mut xs = HashSet::new(); + let mut ys = HashSet::new(); + assert!(xs.is_disjoint(&ys)); + assert!(ys.is_disjoint(&xs)); + assert!(xs.insert(5i)); + assert!(ys.insert(11i)); + assert!(xs.is_disjoint(&ys)); + assert!(ys.is_disjoint(&xs)); + assert!(xs.insert(7)); + assert!(xs.insert(19)); + assert!(xs.insert(4)); + assert!(ys.insert(2)); + assert!(ys.insert(-11)); + assert!(xs.is_disjoint(&ys)); + assert!(ys.is_disjoint(&xs)); + assert!(ys.insert(7)); + assert!(!xs.is_disjoint(&ys)); + assert!(!ys.is_disjoint(&xs)); + } + + #[test] + fn test_subset_and_superset() { + let mut a = HashSet::new(); + assert!(a.insert(0i)); + assert!(a.insert(5)); + assert!(a.insert(11)); + assert!(a.insert(7)); + + let mut b = HashSet::new(); + assert!(b.insert(0i)); + assert!(b.insert(7)); + assert!(b.insert(19)); + assert!(b.insert(250)); + assert!(b.insert(11)); + assert!(b.insert(200)); + + assert!(!a.is_subset(&b)); + assert!(!a.is_superset(&b)); + assert!(!b.is_subset(&a)); + assert!(!b.is_superset(&a)); + + assert!(b.insert(5)); + + assert!(a.is_subset(&b)); + assert!(!a.is_superset(&b)); + assert!(!b.is_subset(&a)); + assert!(b.is_superset(&a)); + } + + #[test] + fn test_iterate() { + let mut a = HashSet::new(); + for i in range(0u, 32) { + assert!(a.insert(i)); + } + let mut observed: u32 = 0; + for k in a.iter() { + observed |= 1 << *k; + } + assert_eq!(observed, 0xFFFF_FFFF); + } + + #[test] + fn test_intersection() { + let mut a = HashSet::new(); + let mut b = HashSet::new(); + + assert!(a.insert(11i)); + assert!(a.insert(1)); + assert!(a.insert(3)); + assert!(a.insert(77)); + assert!(a.insert(103)); + assert!(a.insert(5)); + assert!(a.insert(-5)); + + assert!(b.insert(2i)); + assert!(b.insert(11)); + assert!(b.insert(77)); + assert!(b.insert(-9)); + assert!(b.insert(-42)); + assert!(b.insert(5)); + assert!(b.insert(3)); + + let mut i = 0; + let expected = [3, 5, 11, 77]; + for x in a.intersection(&b) { + assert!(expected.contains(x)); + i += 1 + } + assert_eq!(i, expected.len()); + } + + #[test] + fn test_difference() { + let mut a = HashSet::new(); + let mut b = HashSet::new(); + + assert!(a.insert(1i)); + assert!(a.insert(3)); + assert!(a.insert(5)); + assert!(a.insert(9)); + assert!(a.insert(11)); + + assert!(b.insert(3i)); + assert!(b.insert(9)); + + let mut i = 0; + let expected = [1, 5, 11]; + for x in a.difference(&b) { + assert!(expected.contains(x)); + i += 1 + } + assert_eq!(i, expected.len()); + } + + #[test] + fn test_symmetric_difference() { + let mut a = HashSet::new(); + let mut b = HashSet::new(); + + assert!(a.insert(1i)); + assert!(a.insert(3)); + assert!(a.insert(5)); + assert!(a.insert(9)); + assert!(a.insert(11)); + + assert!(b.insert(-2i)); + assert!(b.insert(3)); + assert!(b.insert(9)); + assert!(b.insert(14)); + assert!(b.insert(22)); + + let mut i = 0; + let expected = [-2, 1, 5, 11, 14, 22]; + for x in a.symmetric_difference(&b) { + assert!(expected.contains(x)); + i += 1 + } + assert_eq!(i, expected.len()); + } + + #[test] + fn test_union() { + let mut a = HashSet::new(); + let mut b = HashSet::new(); + + assert!(a.insert(1i)); + assert!(a.insert(3)); + assert!(a.insert(5)); + assert!(a.insert(9)); + assert!(a.insert(11)); + assert!(a.insert(16)); + assert!(a.insert(19)); + assert!(a.insert(24)); + + assert!(b.insert(-2i)); + assert!(b.insert(1)); + assert!(b.insert(5)); + assert!(b.insert(9)); + assert!(b.insert(13)); + assert!(b.insert(19)); + + let mut i = 0; + let expected = [-2, 1, 3, 5, 9, 11, 13, 16, 19, 24]; + for x in a.union(&b) { + assert!(expected.contains(x)); + i += 1 + } + assert_eq!(i, expected.len()); + } + + #[test] + fn test_from_iter() { + let xs = [1i, 2, 3, 4, 5, 6, 7, 8, 9]; + + let set: HashSet = xs.iter().map(|&x| x).collect(); + + for x in xs.iter() { + assert!(set.contains(x)); + } + } + + #[test] + fn test_move_iter() { + let hs = { + let mut hs = HashSet::new(); + + hs.insert('a'); + hs.insert('b'); + + hs + }; + + let v = hs.into_iter().collect::>(); + assert!(['a', 'b'] == v.as_slice() || ['b', 'a'] == v.as_slice()); + } + + #[test] + fn test_eq() { + // These constants once happened to expose a bug in insert(). + // I'm keeping them around to prevent a regression. + let mut s1 = HashSet::new(); + + s1.insert(1i); + s1.insert(2); + s1.insert(3); + + let mut s2 = HashSet::new(); + + s2.insert(1i); + s2.insert(2); + + assert!(s1 != s2); + + s2.insert(3); + + assert_eq!(s1, s2); + } + + #[test] + fn test_show() { + let mut set: HashSet = HashSet::new(); + let empty: HashSet = HashSet::new(); + + set.insert(1i); + set.insert(2); + + let set_str = format!("{}", set); + + assert!(set_str == "{1, 2}".to_string() || set_str == "{2, 1}".to_string()); + assert_eq!(format!("{}", empty), "{}".to_string()); + } +} diff --git a/src/libstd/collections/hash/table.rs b/src/libstd/collections/hash/table.rs new file mode 100644 index 00000000000..4d73029b7b0 --- /dev/null +++ b/src/libstd/collections/hash/table.rs @@ -0,0 +1,907 @@ +// Copyright 2014 The Rust Project Developers. See the COPYRIGHT +// file at the top-level directory of this distribution and at +// http://rust-lang.org/COPYRIGHT. +// +// Licensed under the Apache License, Version 2.0 or the MIT license +// , at your +// option. This file may not be copied, modified, or distributed +// except according to those terms. +// +// ignore-lexer-test FIXME #15883 + +use clone::Clone; +use cmp; +use hash::{Hash, Hasher}; +use iter::{Iterator, count}; +use kinds::{Sized, marker}; +use mem::{min_align_of, size_of}; +use mem; +use num::{CheckedAdd, CheckedMul, is_power_of_two}; +use ops::{Deref, DerefMut, Drop}; +use option::{Some, None, Option}; +use ptr::{RawPtr, copy_nonoverlapping_memory, zero_memory}; +use ptr; +use rt::heap::{allocate, deallocate}; + +const EMPTY_BUCKET: u64 = 0u64; + +/// The raw hashtable, providing safe-ish access to the unzipped and highly +/// optimized arrays of hashes, keys, and values. +/// +/// This design uses less memory and is a lot faster than the naive +/// `Vec>`, because we don't pay for the overhead of an +/// option on every element, and we get a generally more cache-aware design. +/// +/// Essential invariants of this structure: +/// +/// - if t.hashes[i] == EMPTY_BUCKET, then `Bucket::at_index(&t, i).raw` +/// points to 'undefined' contents. Don't read from it. This invariant is +/// enforced outside this module with the `EmptyBucket`, `FullBucket`, +/// and `SafeHash` types. +/// +/// - An `EmptyBucket` is only constructed at an index with +/// a hash of EMPTY_BUCKET. +/// +/// - A `FullBucket` is only constructed at an index with a +/// non-EMPTY_BUCKET hash. +/// +/// - A `SafeHash` is only constructed for non-`EMPTY_BUCKET` hash. We get +/// around hashes of zero by changing them to 0x8000_0000_0000_0000, +/// which will likely map to the same bucket, while not being confused +/// with "empty". +/// +/// - All three "arrays represented by pointers" are the same length: +/// `capacity`. This is set at creation and never changes. The arrays +/// are unzipped to save space (we don't have to pay for the padding +/// between odd sized elements, such as in a map from u64 to u8), and +/// be more cache aware (scanning through 8 hashes brings in at most +/// 2 cache lines, since they're all right beside each other). +/// +/// You can kind of think of this module/data structure as a safe wrapper +/// around just the "table" part of the hashtable. It enforces some +/// invariants at the type level and employs some performance trickery, +/// but in general is just a tricked out `Vec>`. +#[unsafe_no_drop_flag] +pub struct RawTable { + capacity: uint, + size: uint, + hashes: *mut u64, + // Because K/V do not appear directly in any of the types in the struct, + // inform rustc that in fact instances of K and V are reachable from here. + marker: marker::CovariantType<(K,V)>, +} + +struct RawBucket { + hash: *mut u64, + key: *mut K, + val: *mut V +} + +pub struct Bucket { + raw: RawBucket, + idx: uint, + table: M +} + +pub struct EmptyBucket { + raw: RawBucket, + idx: uint, + table: M +} + +pub struct FullBucket { + raw: RawBucket, + idx: uint, + table: M +} + +pub type EmptyBucketImm<'table, K, V> = EmptyBucket>; +pub type FullBucketImm<'table, K, V> = FullBucket>; + +pub type EmptyBucketMut<'table, K, V> = EmptyBucket>; +pub type FullBucketMut<'table, K, V> = FullBucket>; + +pub enum BucketState { + Empty(EmptyBucket), + Full(FullBucket), +} + +// A GapThenFull encapsulates the state of two consecutive buckets at once. +// The first bucket, called the gap, is known to be empty. +// The second bucket is full. +struct GapThenFull { + gap: EmptyBucket, + full: FullBucket, +} + +/// A hash that is not zero, since we use a hash of zero to represent empty +/// buckets. +#[deriving(PartialEq)] +pub struct SafeHash { + hash: u64, +} + +impl SafeHash { + /// Peek at the hash value, which is guaranteed to be non-zero. + #[inline(always)] + pub fn inspect(&self) -> u64 { self.hash } +} + +/// We need to remove hashes of 0. That's reserved for empty buckets. +/// This function wraps up `hash_keyed` to be the only way outside this +/// module to generate a SafeHash. +pub fn make_hash, S, H: Hasher>(hasher: &H, t: &T) -> SafeHash { + match hasher.hash(t) { + // This constant is exceedingly likely to hash to the same + // bucket, but it won't be counted as empty! Just so we can maintain + // our precious uniform distribution of initial indexes. + EMPTY_BUCKET => SafeHash { hash: 0x8000_0000_0000_0000 }, + h => SafeHash { hash: h }, + } +} + +// `replace` casts a `*u64` to a `*SafeHash`. Since we statically +// ensure that a `FullBucket` points to an index with a non-zero hash, +// and a `SafeHash` is just a `u64` with a different name, this is +// safe. +// +// This test ensures that a `SafeHash` really IS the same size as a +// `u64`. If you need to change the size of `SafeHash` (and +// consequently made this test fail), `replace` needs to be +// modified to no longer assume this. +#[test] +fn can_alias_safehash_as_u64() { + assert_eq!(size_of::(), size_of::()) +} + +impl RawBucket { + unsafe fn offset(self, count: int) -> RawBucket { + RawBucket { + hash: self.hash.offset(count), + key: self.key.offset(count), + val: self.val.offset(count), + } + } +} + +// For parameterizing over mutability. +impl<'t, K, V> Deref> for &'t RawTable { + fn deref(&self) -> &RawTable { + &**self + } +} + +impl<'t, K, V> Deref> for &'t mut RawTable { + fn deref(&self) -> &RawTable { + &**self + } +} + +impl<'t, K, V> DerefMut> for &'t mut RawTable { + fn deref_mut(&mut self) -> &mut RawTable { + &mut **self + } +} + +// Buckets hold references to the table. +impl FullBucket { + /// Borrow a reference to the table. + pub fn table(&self) -> &M { + &self.table + } + /// Move out the reference to the table. + pub fn into_table(self) -> M { + self.table + } + /// Get the raw index. + pub fn index(&self) -> uint { + self.idx + } +} + +impl EmptyBucket { + /// Borrow a reference to the table. + pub fn table(&self) -> &M { + &self.table + } + /// Move out the reference to the table. + pub fn into_table(self) -> M { + self.table + } +} + +impl Bucket { + /// Move out the reference to the table. + pub fn into_table(self) -> M { + self.table + } + /// Get the raw index. + pub fn index(&self) -> uint { + self.idx + } +} + +impl>> Bucket { + pub fn new(table: M, hash: &SafeHash) -> Bucket { + Bucket::at_index(table, hash.inspect() as uint) + } + + pub fn at_index(table: M, ib_index: uint) -> Bucket { + let ib_index = ib_index & (table.capacity() - 1); + Bucket { + raw: unsafe { + table.first_bucket_raw().offset(ib_index as int) + }, + idx: ib_index, + table: table + } + } + + pub fn first(table: M) -> Bucket { + Bucket { + raw: table.first_bucket_raw(), + idx: 0, + table: table + } + } + + /// Reads a bucket at a given index, returning an enum indicating whether + /// it's initialized or not. You need to match on this enum to get + /// the appropriate types to call most of the other functions in + /// this module. + pub fn peek(self) -> BucketState { + match unsafe { *self.raw.hash } { + EMPTY_BUCKET => + Empty(EmptyBucket { + raw: self.raw, + idx: self.idx, + table: self.table + }), + _ => + Full(FullBucket { + raw: self.raw, + idx: self.idx, + table: self.table + }) + } + } + + /// Modifies the bucket pointer in place to make it point to the next slot. + pub fn next(&mut self) { + // Branchless bucket iteration step. + // As we reach the end of the table... + // We take the current idx: 0111111b + // Xor it by its increment: ^ 1000000b + // ------------ + // 1111111b + // Then AND with the capacity: & 1000000b + // ------------ + // to get the backwards offset: 1000000b + // ... and it's zero at all other times. + let maybe_wraparound_dist = (self.idx ^ (self.idx + 1)) & self.table.capacity(); + // Finally, we obtain the offset 1 or the offset -cap + 1. + let dist = 1i - (maybe_wraparound_dist as int); + + self.idx += 1; + + unsafe { + self.raw = self.raw.offset(dist); + } + } +} + +impl>> EmptyBucket { + #[inline] + pub fn next(self) -> Bucket { + let mut bucket = self.into_bucket(); + bucket.next(); + bucket + } + + #[inline] + pub fn into_bucket(self) -> Bucket { + Bucket { + raw: self.raw, + idx: self.idx, + table: self.table + } + } + + pub fn gap_peek(self) -> Option> { + let gap = EmptyBucket { + raw: self.raw, + idx: self.idx, + table: () + }; + + match self.next().peek() { + Full(bucket) => { + Some(GapThenFull { + gap: gap, + full: bucket + }) + } + Empty(..) => None + } + } +} + +impl>> EmptyBucket { + /// Puts given key and value pair, along with the key's hash, + /// into this bucket in the hashtable. Note how `self` is 'moved' into + /// this function, because this slot will no longer be empty when + /// we return! A `FullBucket` is returned for later use, pointing to + /// the newly-filled slot in the hashtable. + /// + /// Use `make_hash` to construct a `SafeHash` to pass to this function. + pub fn put(mut self, hash: SafeHash, key: K, value: V) + -> FullBucket { + unsafe { + *self.raw.hash = hash.inspect(); + ptr::write(self.raw.key, key); + ptr::write(self.raw.val, value); + } + + self.table.size += 1; + + FullBucket { raw: self.raw, idx: self.idx, table: self.table } + } +} + +impl>> FullBucket { + #[inline] + pub fn next(self) -> Bucket { + let mut bucket = self.into_bucket(); + bucket.next(); + bucket + } + + #[inline] + pub fn into_bucket(self) -> Bucket { + Bucket { + raw: self.raw, + idx: self.idx, + table: self.table + } + } + + /// Get the distance between this bucket and the 'ideal' location + /// as determined by the key's hash stored in it. + /// + /// In the cited blog posts above, this is called the "distance to + /// initial bucket", or DIB. Also known as "probe count". + pub fn distance(&self) -> uint { + // Calculates the distance one has to travel when going from + // `hash mod capacity` onwards to `idx mod capacity`, wrapping around + // if the destination is not reached before the end of the table. + (self.idx - self.hash().inspect() as uint) & (self.table.capacity() - 1) + } + + #[inline] + pub fn hash(&self) -> SafeHash { + unsafe { + SafeHash { + hash: *self.raw.hash + } + } + } + + /// Gets references to the key and value at a given index. + pub fn read(&self) -> (&K, &V) { + unsafe { + (&*self.raw.key, + &*self.raw.val) + } + } +} + +impl>> FullBucket { + /// Removes this bucket's key and value from the hashtable. + /// + /// This works similarly to `put`, building an `EmptyBucket` out of the + /// taken bucket. + pub fn take(mut self) -> (EmptyBucket, K, V) { + let key = self.raw.key as *const K; + let val = self.raw.val as *const V; + + self.table.size -= 1; + + unsafe { + *self.raw.hash = EMPTY_BUCKET; + ( + EmptyBucket { + raw: self.raw, + idx: self.idx, + table: self.table + }, + ptr::read(key), + ptr::read(val) + ) + } + } + + pub fn replace(&mut self, h: SafeHash, k: K, v: V) -> (SafeHash, K, V) { + unsafe { + let old_hash = ptr::replace(self.raw.hash as *mut SafeHash, h); + let old_key = ptr::replace(self.raw.key, k); + let old_val = ptr::replace(self.raw.val, v); + + (old_hash, old_key, old_val) + } + } + + /// Gets mutable references to the key and value at a given index. + pub fn read_mut(&mut self) -> (&mut K, &mut V) { + unsafe { + (&mut *self.raw.key, + &mut *self.raw.val) + } + } +} + +impl<'t, K, V, M: Deref> + 't> FullBucket { + /// Exchange a bucket state for immutable references into the table. + /// Because the underlying reference to the table is also consumed, + /// no further changes to the structure of the table are possible; + /// in exchange for this, the returned references have a longer lifetime + /// than the references returned by `read()`. + pub fn into_refs(self) -> (&'t K, &'t V) { + unsafe { + (&*self.raw.key, + &*self.raw.val) + } + } +} + +impl<'t, K, V, M: DerefMut> + 't> FullBucket { + /// This works similarly to `into_refs`, exchanging a bucket state + /// for mutable references into the table. + pub fn into_mut_refs(self) -> (&'t mut K, &'t mut V) { + unsafe { + (&mut *self.raw.key, + &mut *self.raw.val) + } + } +} + +impl BucketState { + // For convenience. + pub fn expect_full(self) -> FullBucket { + match self { + Full(full) => full, + Empty(..) => panic!("Expected full bucket") + } + } +} + +impl>> GapThenFull { + #[inline] + pub fn full(&self) -> &FullBucket { + &self.full + } + + pub fn shift(mut self) -> Option> { + unsafe { + *self.gap.raw.hash = mem::replace(&mut *self.full.raw.hash, EMPTY_BUCKET); + copy_nonoverlapping_memory(self.gap.raw.key, self.full.raw.key as *const K, 1); + copy_nonoverlapping_memory(self.gap.raw.val, self.full.raw.val as *const V, 1); + } + + let FullBucket { raw: prev_raw, idx: prev_idx, .. } = self.full; + + match self.full.next().peek() { + Full(bucket) => { + self.gap.raw = prev_raw; + self.gap.idx = prev_idx; + + self.full = bucket; + + Some(self) + } + Empty(..) => None + } + } +} + + +/// Rounds up to a multiple of a power of two. Returns the closest multiple +/// of `target_alignment` that is higher or equal to `unrounded`. +/// +/// # Failure +/// +/// Fails if `target_alignment` is not a power of two. +fn round_up_to_next(unrounded: uint, target_alignment: uint) -> uint { + assert!(is_power_of_two(target_alignment)); + (unrounded + target_alignment - 1) & !(target_alignment - 1) +} + +#[test] +fn test_rounding() { + assert_eq!(round_up_to_next(0, 4), 0); + assert_eq!(round_up_to_next(1, 4), 4); + assert_eq!(round_up_to_next(2, 4), 4); + assert_eq!(round_up_to_next(3, 4), 4); + assert_eq!(round_up_to_next(4, 4), 4); + assert_eq!(round_up_to_next(5, 4), 8); +} + +// Returns a tuple of (key_offset, val_offset), +// from the start of a mallocated array. +fn calculate_offsets(hashes_size: uint, + keys_size: uint, keys_align: uint, + vals_align: uint) + -> (uint, uint) { + let keys_offset = round_up_to_next(hashes_size, keys_align); + let end_of_keys = keys_offset + keys_size; + + let vals_offset = round_up_to_next(end_of_keys, vals_align); + + (keys_offset, vals_offset) +} + +// Returns a tuple of (minimum required malloc alignment, hash_offset, +// array_size), from the start of a mallocated array. +fn calculate_allocation(hash_size: uint, hash_align: uint, + keys_size: uint, keys_align: uint, + vals_size: uint, vals_align: uint) + -> (uint, uint, uint) { + let hash_offset = 0; + let (_, vals_offset) = calculate_offsets(hash_size, + keys_size, keys_align, + vals_align); + let end_of_vals = vals_offset + vals_size; + + let min_align = cmp::max(hash_align, cmp::max(keys_align, vals_align)); + + (min_align, hash_offset, end_of_vals) +} + +#[test] +fn test_offset_calculation() { + assert_eq!(calculate_allocation(128, 8, 15, 1, 4, 4), (8, 0, 148)); + assert_eq!(calculate_allocation(3, 1, 2, 1, 1, 1), (1, 0, 6)); + assert_eq!(calculate_allocation(6, 2, 12, 4, 24, 8), (8, 0, 48)); + assert_eq!(calculate_offsets(128, 15, 1, 4), (128, 144)); + assert_eq!(calculate_offsets(3, 2, 1, 1), (3, 5)); + assert_eq!(calculate_offsets(6, 12, 4, 8), (8, 24)); +} + +impl RawTable { + /// Does not initialize the buckets. The caller should ensure they, + /// at the very least, set every hash to EMPTY_BUCKET. + unsafe fn new_uninitialized(capacity: uint) -> RawTable { + if capacity == 0 { + return RawTable { + size: 0, + capacity: 0, + hashes: 0 as *mut u64, + marker: marker::CovariantType, + }; + } + // No need for `checked_mul` before a more restrictive check performed + // later in this method. + let hashes_size = capacity * size_of::(); + let keys_size = capacity * size_of::< K >(); + let vals_size = capacity * size_of::< V >(); + + // Allocating hashmaps is a little tricky. We need to allocate three + // arrays, but since we know their sizes and alignments up front, + // we just allocate a single array, and then have the subarrays + // point into it. + // + // This is great in theory, but in practice getting the alignment + // right is a little subtle. Therefore, calculating offsets has been + // factored out into a different function. + let (malloc_alignment, hash_offset, size) = + calculate_allocation( + hashes_size, min_align_of::(), + keys_size, min_align_of::< K >(), + vals_size, min_align_of::< V >()); + + // One check for overflow that covers calculation and rounding of size. + let size_of_bucket = size_of::().checked_add(&size_of::()).unwrap() + .checked_add(&size_of::()).unwrap(); + assert!(size >= capacity.checked_mul(&size_of_bucket) + .expect("capacity overflow"), + "capacity overflow"); + + let buffer = allocate(size, malloc_alignment); + if buffer.is_null() { ::alloc::oom() } + + let hashes = buffer.offset(hash_offset as int) as *mut u64; + + RawTable { + capacity: capacity, + size: 0, + hashes: hashes, + marker: marker::CovariantType, + } + } + + fn first_bucket_raw(&self) -> RawBucket { + let hashes_size = self.capacity * size_of::(); + let keys_size = self.capacity * size_of::(); + + let buffer = self.hashes as *mut u8; + let (keys_offset, vals_offset) = calculate_offsets(hashes_size, + keys_size, min_align_of::(), + min_align_of::()); + + unsafe { + RawBucket { + hash: self.hashes, + key: buffer.offset(keys_offset as int) as *mut K, + val: buffer.offset(vals_offset as int) as *mut V + } + } + } + + /// Creates a new raw table from a given capacity. All buckets are + /// initially empty. + #[allow(experimental)] + pub fn new(capacity: uint) -> RawTable { + unsafe { + let ret = RawTable::new_uninitialized(capacity); + zero_memory(ret.hashes, capacity); + ret + } + } + + /// The hashtable's capacity, similar to a vector's. + pub fn capacity(&self) -> uint { + self.capacity + } + + /// The number of elements ever `put` in the hashtable, minus the number + /// of elements ever `take`n. + pub fn size(&self) -> uint { + self.size + } + + fn raw_buckets(&self) -> RawBuckets { + RawBuckets { + raw: self.first_bucket_raw(), + hashes_end: unsafe { + self.hashes.offset(self.capacity as int) + }, + marker: marker::ContravariantLifetime, + } + } + + pub fn iter(&self) -> Entries { + Entries { + iter: self.raw_buckets(), + elems_left: self.size(), + } + } + + pub fn iter_mut(&mut self) -> MutEntries { + MutEntries { + iter: self.raw_buckets(), + elems_left: self.size(), + } + } + + pub fn into_iter(self) -> MoveEntries { + let RawBuckets { raw, hashes_end, .. } = self.raw_buckets(); + // Replace the marker regardless of lifetime bounds on parameters. + MoveEntries { + iter: RawBuckets { + raw: raw, + hashes_end: hashes_end, + marker: marker::ContravariantLifetime, + }, + table: self, + } + } + + /// Returns an iterator that copies out each entry. Used while the table + /// is being dropped. + unsafe fn rev_move_buckets(&mut self) -> RevMoveBuckets { + let raw_bucket = self.first_bucket_raw(); + RevMoveBuckets { + raw: raw_bucket.offset(self.capacity as int), + hashes_end: raw_bucket.hash, + elems_left: self.size, + marker: marker::ContravariantLifetime, + } + } +} + +/// A raw iterator. The basis for some other iterators in this module. Although +/// this interface is safe, it's not used outside this module. +struct RawBuckets<'a, K, V> { + raw: RawBucket, + hashes_end: *mut u64, + marker: marker::ContravariantLifetime<'a>, +} + +impl<'a, K, V> Iterator> for RawBuckets<'a, K, V> { + fn next(&mut self) -> Option> { + while self.raw.hash != self.hashes_end { + unsafe { + // We are swapping out the pointer to a bucket and replacing + // it with the pointer to the next one. + let prev = ptr::replace(&mut self.raw, self.raw.offset(1)); + if *prev.hash != EMPTY_BUCKET { + return Some(prev); + } + } + } + + None + } +} + +/// An iterator that moves out buckets in reverse order. It leaves the table +/// in an an inconsistent state and should only be used for dropping +/// the table's remaining entries. It's used in the implementation of Drop. +struct RevMoveBuckets<'a, K, V> { + raw: RawBucket, + hashes_end: *mut u64, + elems_left: uint, + marker: marker::ContravariantLifetime<'a>, +} + +impl<'a, K, V> Iterator<(K, V)> for RevMoveBuckets<'a, K, V> { + fn next(&mut self) -> Option<(K, V)> { + if self.elems_left == 0 { + return None; + } + + loop { + debug_assert!(self.raw.hash != self.hashes_end); + + unsafe { + self.raw = self.raw.offset(-1); + + if *self.raw.hash != EMPTY_BUCKET { + self.elems_left -= 1; + return Some(( + ptr::read(self.raw.key as *const K), + ptr::read(self.raw.val as *const V) + )); + } + } + } + } +} + +/// Iterator over shared references to entries in a table. +pub struct Entries<'a, K: 'a, V: 'a> { + iter: RawBuckets<'a, K, V>, + elems_left: uint, +} + +/// Iterator over mutable references to entries in a table. +pub struct MutEntries<'a, K: 'a, V: 'a> { + iter: RawBuckets<'a, K, V>, + elems_left: uint, +} + +/// Iterator over the entries in a table, consuming the table. +pub struct MoveEntries { + table: RawTable, + iter: RawBuckets<'static, K, V> +} + +impl<'a, K, V> Iterator<(&'a K, &'a V)> for Entries<'a, K, V> { + fn next(&mut self) -> Option<(&'a K, &'a V)> { + self.iter.next().map(|bucket| { + self.elems_left -= 1; + unsafe { + (&*bucket.key, + &*bucket.val) + } + }) + } + + fn size_hint(&self) -> (uint, Option) { + (self.elems_left, Some(self.elems_left)) + } +} + +impl<'a, K, V> Iterator<(&'a K, &'a mut V)> for MutEntries<'a, K, V> { + fn next(&mut self) -> Option<(&'a K, &'a mut V)> { + self.iter.next().map(|bucket| { + self.elems_left -= 1; + unsafe { + (&*bucket.key, + &mut *bucket.val) + } + }) + } + + fn size_hint(&self) -> (uint, Option) { + (self.elems_left, Some(self.elems_left)) + } +} + +impl Iterator<(SafeHash, K, V)> for MoveEntries { + fn next(&mut self) -> Option<(SafeHash, K, V)> { + self.iter.next().map(|bucket| { + self.table.size -= 1; + unsafe { + ( + SafeHash { + hash: *bucket.hash, + }, + ptr::read(bucket.key as *const K), + ptr::read(bucket.val as *const V) + ) + } + }) + } + + fn size_hint(&self) -> (uint, Option) { + let size = self.table.size(); + (size, Some(size)) + } +} + +impl Clone for RawTable { + fn clone(&self) -> RawTable { + unsafe { + let mut new_ht = RawTable::new_uninitialized(self.capacity()); + + { + let cap = self.capacity(); + let mut new_buckets = Bucket::first(&mut new_ht); + let mut buckets = Bucket::first(self); + while buckets.index() != cap { + match buckets.peek() { + Full(full) => { + let (h, k, v) = { + let (k, v) = full.read(); + (full.hash(), k.clone(), v.clone()) + }; + *new_buckets.raw.hash = h.inspect(); + ptr::write(new_buckets.raw.key, k); + ptr::write(new_buckets.raw.val, v); + } + Empty(..) => { + *new_buckets.raw.hash = EMPTY_BUCKET; + } + } + new_buckets.next(); + buckets.next(); + } + }; + + new_ht.size = self.size(); + + new_ht + } + } +} + +#[unsafe_destructor] +impl Drop for RawTable { + fn drop(&mut self) { + if self.hashes.is_null() { + return; + } + // This is done in reverse because we've likely partially taken + // some elements out with `.into_iter()` from the front. + // Check if the size is 0, so we don't do a useless scan when + // dropping empty tables such as on resize. + // Also avoid double drop of elements that have been already moved out. + unsafe { + for _ in self.rev_move_buckets() {} + } + + let hashes_size = self.capacity * size_of::(); + let keys_size = self.capacity * size_of::(); + let vals_size = self.capacity * size_of::(); + let (align, _, size) = calculate_allocation(hashes_size, min_align_of::(), + keys_size, min_align_of::(), + vals_size, min_align_of::()); + + unsafe { + deallocate(self.hashes as *mut u8, size, align); + // Remember how everything was allocated out of one buffer + // during initialization? We only need one call to free here. + } + } +} diff --git a/src/libstd/collections/hashmap/bench.rs b/src/libstd/collections/hashmap/bench.rs deleted file mode 100644 index 21bbb38f489..00000000000 --- a/src/libstd/collections/hashmap/bench.rs +++ /dev/null @@ -1,130 +0,0 @@ -// Copyright 2014 The Rust Project Developers. See the COPYRIGHT -// file at the top-level directory of this distribution and at -// http://rust-lang.org/COPYRIGHT. -// -// Licensed under the Apache License, Version 2.0 or the MIT license -// , at your -// option. This file may not be copied, modified, or distributed -// except according to those terms. - -#![cfg(test)] - -extern crate test; -use prelude::*; - -use self::test::Bencher; -use iter::{range_inclusive}; - -#[bench] -fn new_drop(b : &mut Bencher) { - use super::HashMap; - - b.iter(|| { - let m : HashMap = HashMap::new(); - assert_eq!(m.len(), 0); - }) -} - -#[bench] -fn new_insert_drop(b : &mut Bencher) { - use super::HashMap; - - b.iter(|| { - let mut m = HashMap::new(); - m.insert(0i, 0i); - assert_eq!(m.len(), 1); - }) -} - -#[bench] -fn grow_by_insertion(b: &mut Bencher) { - use super::HashMap; - - let mut m = HashMap::new(); - - for i in range_inclusive(1i, 1000) { - m.insert(i, i); - } - - let mut k = 1001; - - b.iter(|| { - m.insert(k, k); - k += 1; - }); -} - -#[bench] -fn find_existing(b: &mut Bencher) { - use super::HashMap; - - let mut m = HashMap::new(); - - for i in range_inclusive(1i, 1000) { - m.insert(i, i); - } - - b.iter(|| { - for i in range_inclusive(1i, 1000) { - m.contains_key(&i); - } - }); -} - -#[bench] -fn find_nonexisting(b: &mut Bencher) { - use super::HashMap; - - let mut m = HashMap::new(); - - for i in range_inclusive(1i, 1000) { - m.insert(i, i); - } - - b.iter(|| { - for i in range_inclusive(1001i, 2000) { - m.contains_key(&i); - } - }); -} - -#[bench] -fn hashmap_as_queue(b: &mut Bencher) { - use super::HashMap; - - let mut m = HashMap::new(); - - for i in range_inclusive(1i, 1000) { - m.insert(i, i); - } - - let mut k = 1i; - - b.iter(|| { - m.pop(&k); - m.insert(k + 1000, k + 1000); - k += 1; - }); -} - -#[bench] -fn find_pop_insert(b: &mut Bencher) { - use super::HashMap; - - let mut m = HashMap::new(); - - for i in range_inclusive(1i, 1000) { - m.insert(i, i); - } - - let mut k = 1i; - - b.iter(|| { - m.find(&(k + 400)); - m.find(&(k + 2000)); - m.pop(&k); - m.insert(k + 1000, k + 1000); - k += 1; - }) -} diff --git a/src/libstd/collections/hashmap/map.rs b/src/libstd/collections/hashmap/map.rs deleted file mode 100644 index 596e483c2f6..00000000000 --- a/src/libstd/collections/hashmap/map.rs +++ /dev/null @@ -1,2133 +0,0 @@ -// Copyright 2014 The Rust Project Developers. See the COPYRIGHT -// file at the top-level directory of this distribution and at -// http://rust-lang.org/COPYRIGHT. -// -// Licensed under the Apache License, Version 2.0 or the MIT license -// , at your -// option. This file may not be copied, modified, or distributed -// except according to those terms. -// -// ignore-lexer-test FIXME #15883 - -use clone::Clone; -use cmp::{max, Eq, Equiv, PartialEq}; -use default::Default; -use fmt::{mod, Show}; -use hash::{Hash, Hasher, RandomSipHasher}; -use iter::{mod, Iterator, FromIterator, Extendable}; -use kinds::Sized; -use mem::{mod, replace}; -use num; -use ops::{Deref, Index, IndexMut}; -use option::{Some, None, Option}; -use result::{Result, Ok, Err}; - -use super::table; -use super::table::{ - Bucket, - Empty, - EmptyBucket, - Full, - FullBucket, - FullBucketImm, - FullBucketMut, - RawTable, - SafeHash -}; - -const INITIAL_LOG2_CAP: uint = 5; -pub const INITIAL_CAPACITY: uint = 1 << INITIAL_LOG2_CAP; // 2^5 - -/// The default behavior of HashMap implements a load factor of 90.9%. -/// This behavior is characterized by the following conditions: -/// -/// - if size > 0.909 * capacity: grow -/// - if size < 0.25 * capacity: shrink (if this won't bring capacity lower -/// than the minimum) -#[deriving(Clone)] -struct DefaultResizePolicy { - /// Doubled minimal capacity. The capacity must never drop below - /// the minimum capacity. (The check happens before the capacity - /// is potentially halved.) - minimum_capacity2: uint -} - -impl DefaultResizePolicy { - fn new(new_capacity: uint) -> DefaultResizePolicy { - DefaultResizePolicy { - minimum_capacity2: new_capacity << 1 - } - } - - #[inline] - fn capacity_range(&self, new_size: uint) -> (uint, uint) { - // Here, we are rephrasing the logic by specifying the ranges: - // - // - if `size * 1.1 < cap < size * 4`: don't resize - // - if `cap < minimum_capacity * 2`: don't shrink - // - otherwise, resize accordingly - ((new_size * 11) / 10, max(new_size << 2, self.minimum_capacity2)) - } - - #[inline] - fn reserve(&mut self, new_capacity: uint) { - self.minimum_capacity2 = new_capacity << 1; - } -} - -// The main performance trick in this hashmap is called Robin Hood Hashing. -// It gains its excellent performance from one essential operation: -// -// If an insertion collides with an existing element, and that element's -// "probe distance" (how far away the element is from its ideal location) -// is higher than how far we've already probed, swap the elements. -// -// This massively lowers variance in probe distance, and allows us to get very -// high load factors with good performance. The 90% load factor I use is rather -// conservative. -// -// > Why a load factor of approximately 90%? -// -// In general, all the distances to initial buckets will converge on the mean. -// At a load factor of α, the odds of finding the target bucket after k -// probes is approximately 1-α^k. If we set this equal to 50% (since we converge -// on the mean) and set k=8 (64-byte cache line / 8-byte hash), α=0.92. I round -// this down to make the math easier on the CPU and avoid its FPU. -// Since on average we start the probing in the middle of a cache line, this -// strategy pulls in two cache lines of hashes on every lookup. I think that's -// pretty good, but if you want to trade off some space, it could go down to one -// cache line on average with an α of 0.84. -// -// > Wait, what? Where did you get 1-α^k from? -// -// On the first probe, your odds of a collision with an existing element is α. -// The odds of doing this twice in a row is approximately α^2. For three times, -// α^3, etc. Therefore, the odds of colliding k times is α^k. The odds of NOT -// colliding after k tries is 1-α^k. -// -// The paper from 1986 cited below mentions an implementation which keeps track -// of the distance-to-initial-bucket histogram. This approach is not suitable -// for modern architectures because it requires maintaining an internal data -// structure. This allows very good first guesses, but we are most concerned -// with guessing entire cache lines, not individual indexes. Furthermore, array -// accesses are no longer linear and in one direction, as we have now. There -// is also memory and cache pressure that this would entail that would be very -// difficult to properly see in a microbenchmark. -// -// ## Future Improvements (FIXME!) -// -// Allow the load factor to be changed dynamically and/or at initialization. -// -// Also, would it be possible for us to reuse storage when growing the -// underlying table? This is exactly the use case for 'realloc', and may -// be worth exploring. -// -// ## Future Optimizations (FIXME!) -// -// Another possible design choice that I made without any real reason is -// parameterizing the raw table over keys and values. Technically, all we need -// is the size and alignment of keys and values, and the code should be just as -// efficient (well, we might need one for power-of-two size and one for not...). -// This has the potential to reduce code bloat in rust executables, without -// really losing anything except 4 words (key size, key alignment, val size, -// val alignment) which can be passed in to every call of a `RawTable` function. -// This would definitely be an avenue worth exploring if people start complaining -// about the size of rust executables. -// -// Annotate exceedingly likely branches in `table::make_hash` -// and `search_hashed_generic` to reduce instruction cache pressure -// and mispredictions once it becomes possible (blocked on issue #11092). -// -// Shrinking the table could simply reallocate in place after moving buckets -// to the first half. -// -// The growth algorithm (fragment of the Proof of Correctness) -// -------------------- -// -// The growth algorithm is basically a fast path of the naive reinsertion- -// during-resize algorithm. Other paths should never be taken. -// -// Consider growing a robin hood hashtable of capacity n. Normally, we do this -// by allocating a new table of capacity `2n`, and then individually reinsert -// each element in the old table into the new one. This guarantees that the -// new table is a valid robin hood hashtable with all the desired statistical -// properties. Remark that the order we reinsert the elements in should not -// matter. For simplicity and efficiency, we will consider only linear -// reinsertions, which consist of reinserting all elements in the old table -// into the new one by increasing order of index. However we will not be -// starting our reinsertions from index 0 in general. If we start from index -// i, for the purpose of reinsertion we will consider all elements with real -// index j < i to have virtual index n + j. -// -// Our hash generation scheme consists of generating a 64-bit hash and -// truncating the most significant bits. When moving to the new table, we -// simply introduce a new bit to the front of the hash. Therefore, if an -// elements has ideal index i in the old table, it can have one of two ideal -// locations in the new table. If the new bit is 0, then the new ideal index -// is i. If the new bit is 1, then the new ideal index is n + i. Intutively, -// we are producing two independent tables of size n, and for each element we -// independently choose which table to insert it into with equal probability. -// However the rather than wrapping around themselves on overflowing their -// indexes, the first table overflows into the first, and the first into the -// second. Visually, our new table will look something like: -// -// [yy_xxx_xxxx_xxx|xx_yyy_yyyy_yyy] -// -// Where x's are elements inserted into the first table, y's are elements -// inserted into the second, and _'s are empty sections. We now define a few -// key concepts that we will use later. Note that this is a very abstract -// perspective of the table. A real resized table would be at least half -// empty. -// -// Theorem: A linear robin hood reinsertion from the first ideal element -// produces identical results to a linear naive reinsertion from the same -// element. -// -// FIXME(Gankro, pczarn): review the proof and put it all in a separate doc.rs - -/// A hash map implementation which uses linear probing with Robin -/// Hood bucket stealing. -/// -/// The hashes are all keyed by the task-local random number generator -/// on creation by default. This means that the ordering of the keys is -/// randomized, but makes the tables more resistant to -/// denial-of-service attacks (Hash DoS). This behaviour can be -/// overridden with one of the constructors. -/// -/// It is required that the keys implement the `Eq` and `Hash` traits, although -/// this can frequently be achieved by using `#[deriving(Eq, Hash)]`. -/// -/// Relevant papers/articles: -/// -/// 1. Pedro Celis. ["Robin Hood Hashing"](https://cs.uwaterloo.ca/research/tr/1986/CS-86-14.pdf) -/// 2. Emmanuel Goossaert. ["Robin Hood -/// hashing"](http://codecapsule.com/2013/11/11/robin-hood-hashing/) -/// 3. Emmanuel Goossaert. ["Robin Hood hashing: backward shift -/// deletion"](http://codecapsule.com/2013/11/17/robin-hood-hashing-backward-shift-deletion/) -/// -/// # Example -/// -/// ``` -/// use std::collections::HashMap; -/// -/// // type inference lets us omit an explicit type signature (which -/// // would be `HashMap<&str, &str>` in this example). -/// let mut book_reviews = HashMap::new(); -/// -/// // review some books. -/// book_reviews.insert("Adventures of Huckleberry Finn", "My favorite book."); -/// book_reviews.insert("Grimms' Fairy Tales", "Masterpiece."); -/// book_reviews.insert("Pride and Prejudice", "Very enjoyable."); -/// book_reviews.insert("The Adventures of Sherlock Holmes", "Eye lyked it alot."); -/// -/// // check for a specific one. -/// if !book_reviews.contains_key(&("Les Misérables")) { -/// println!("We've got {} reviews, but Les Misérables ain't one.", -/// book_reviews.len()); -/// } -/// -/// // oops, this review has a lot of spelling mistakes, let's delete it. -/// book_reviews.remove(&("The Adventures of Sherlock Holmes")); -/// -/// // look up the values associated with some keys. -/// let to_find = ["Pride and Prejudice", "Alice's Adventure in Wonderland"]; -/// for book in to_find.iter() { -/// match book_reviews.find(book) { -/// Some(review) => println!("{}: {}", *book, *review), -/// None => println!("{} is unreviewed.", *book) -/// } -/// } -/// -/// // iterate over everything. -/// for (book, review) in book_reviews.iter() { -/// println!("{}: \"{}\"", *book, *review); -/// } -/// ``` -/// -/// The easiest way to use `HashMap` with a custom type is to derive `Eq` and `Hash`. -/// We must also derive `PartialEq`. -/// -/// ``` -/// use std::collections::HashMap; -/// -/// #[deriving(Hash, Eq, PartialEq, Show)] -/// struct Viking<'a> { -/// name: &'a str, -/// power: uint, -/// } -/// -/// let mut vikings = HashMap::new(); -/// -/// vikings.insert("Norway", Viking { name: "Einar", power: 9u }); -/// vikings.insert("Denmark", Viking { name: "Olaf", power: 4u }); -/// vikings.insert("Iceland", Viking { name: "Harald", power: 8u }); -/// -/// // Use derived implementation to print the vikings. -/// for (land, viking) in vikings.iter() { -/// println!("{} at {}", viking, land); -/// } -/// ``` -#[deriving(Clone)] -pub struct HashMap { - // All hashes are keyed on these values, to prevent hash collision attacks. - hasher: H, - - table: RawTable, - - // We keep this at the end since it might as well have tail padding. - resize_policy: DefaultResizePolicy, -} - -/// Search for a pre-hashed key. -fn search_hashed_generic>>(table: M, - hash: &SafeHash, - is_match: |&K| -> bool) - -> SearchResult { - let size = table.size(); - let mut probe = Bucket::new(table, hash); - let ib = probe.index(); - - while probe.index() != ib + size { - let full = match probe.peek() { - Empty(b) => return TableRef(b.into_table()), // hit an empty bucket - Full(b) => b - }; - - if full.distance() + ib < full.index() { - // We can finish the search early if we hit any bucket - // with a lower distance to initial bucket than we've probed. - return TableRef(full.into_table()); - } - - // If the hash doesn't match, it can't be this one.. - if *hash == full.hash() { - let matched = { - let (k, _) = full.read(); - is_match(k) - }; - - // If the key doesn't match, it can't be this one.. - if matched { - return FoundExisting(full); - } - } - - probe = full.next(); - } - - TableRef(probe.into_table()) -} - -fn search_hashed>>(table: M, hash: &SafeHash, k: &K) - -> SearchResult { - search_hashed_generic(table, hash, |k_| *k == *k_) -} - -fn pop_internal(starting_bucket: FullBucketMut) -> (K, V) { - let (empty, retkey, retval) = starting_bucket.take(); - let mut gap = match empty.gap_peek() { - Some(b) => b, - None => return (retkey, retval) - }; - - while gap.full().distance() != 0 { - gap = match gap.shift() { - Some(b) => b, - None => break - }; - } - - // Now we've done all our shifting. Return the value we grabbed earlier. - return (retkey, retval); -} - -/// Perform robin hood bucket stealing at the given `bucket`. You must -/// also pass the position of that bucket's initial bucket so we don't have -/// to recalculate it. -/// -/// `hash`, `k`, and `v` are the elements to "robin hood" into the hashtable. -fn robin_hood<'a, K: 'a, V: 'a>(mut bucket: FullBucketMut<'a, K, V>, - mut ib: uint, - mut hash: SafeHash, - mut k: K, - mut v: V) - -> &'a mut V { - let starting_index = bucket.index(); - let size = { - let table = bucket.table(); // FIXME "lifetime too short". - table.size() - }; - // There can be at most `size - dib` buckets to displace, because - // in the worst case, there are `size` elements and we already are - // `distance` buckets away from the initial one. - let idx_end = starting_index + size - bucket.distance(); - - loop { - let (old_hash, old_key, old_val) = bucket.replace(hash, k, v); - loop { - let probe = bucket.next(); - assert!(probe.index() != idx_end); - - let full_bucket = match probe.peek() { - table::Empty(bucket) => { - // Found a hole! - let b = bucket.put(old_hash, old_key, old_val); - // Now that it's stolen, just read the value's pointer - // right out of the table! - let (_, v) = Bucket::at_index(b.into_table(), starting_index).peek() - .expect_full() - .into_mut_refs(); - return v; - }, - table::Full(bucket) => bucket - }; - - let probe_ib = full_bucket.index() - full_bucket.distance(); - - bucket = full_bucket; - - // Robin hood! Steal the spot. - if ib < probe_ib { - ib = probe_ib; - hash = old_hash; - k = old_key; - v = old_val; - break; - } - } - } -} - -/// A result that works like Option> but preserves -/// the reference that grants us access to the table in any case. -enum SearchResult { - // This is an entry that holds the given key: - FoundExisting(FullBucket), - - // There was no such entry. The reference is given back: - TableRef(M) -} - -impl SearchResult { - fn into_option(self) -> Option> { - match self { - FoundExisting(bucket) => Some(bucket), - TableRef(_) => None - } - } -} - -impl, V, S, H: Hasher> HashMap { - fn make_hash>(&self, x: &X) -> SafeHash { - table::make_hash(&self.hasher, x) - } - - fn search_equiv<'a, Sized? Q: Hash + Equiv>(&'a self, q: &Q) - -> Option> { - let hash = self.make_hash(q); - search_hashed_generic(&self.table, &hash, |k| q.equiv(k)).into_option() - } - - fn search_equiv_mut<'a, Sized? Q: Hash + Equiv>(&'a mut self, q: &Q) - -> Option> { - let hash = self.make_hash(q); - search_hashed_generic(&mut self.table, &hash, |k| q.equiv(k)).into_option() - } - - /// Search for a key, yielding the index if it's found in the hashtable. - /// If you already have the hash for the key lying around, use - /// search_hashed. - fn search<'a>(&'a self, k: &K) -> Option> { - let hash = self.make_hash(k); - search_hashed(&self.table, &hash, k).into_option() - } - - fn search_mut<'a>(&'a mut self, k: &K) -> Option> { - let hash = self.make_hash(k); - search_hashed(&mut self.table, &hash, k).into_option() - } - - // The caller should ensure that invariants by Robin Hood Hashing hold. - fn insert_hashed_ordered(&mut self, hash: SafeHash, k: K, v: V) { - let cap = self.table.capacity(); - let mut buckets = Bucket::new(&mut self.table, &hash); - let ib = buckets.index(); - - while buckets.index() != ib + cap { - // We don't need to compare hashes for value swap. - // Not even DIBs for Robin Hood. - buckets = match buckets.peek() { - Empty(empty) => { - empty.put(hash, k, v); - return; - } - Full(b) => b.into_bucket() - }; - buckets.next(); - } - panic!("Internal HashMap error: Out of space."); - } -} - -impl HashMap { - /// Create an empty HashMap. - /// - /// # Example - /// - /// ``` - /// use std::collections::HashMap; - /// let mut map: HashMap<&str, int> = HashMap::with_capacity(10); - /// ``` - #[inline] - pub fn new() -> HashMap { - let hasher = RandomSipHasher::new(); - HashMap::with_hasher(hasher) - } - - /// Creates an empty hash map with the given initial capacity. - /// - /// # Example - /// - /// ``` - /// use std::collections::HashMap; - /// let mut map: HashMap<&str, int> = HashMap::with_capacity(10); - /// ``` - #[inline] - pub fn with_capacity(capacity: uint) -> HashMap { - let hasher = RandomSipHasher::new(); - HashMap::with_capacity_and_hasher(capacity, hasher) - } -} - -impl, V, S, H: Hasher> HashMap { - /// Creates an empty hashmap which will use the given hasher to hash keys. - /// - /// The creates map has the default initial capacity. - /// - /// # Example - /// - /// ``` - /// use std::collections::HashMap; - /// use std::hash::sip::SipHasher; - /// - /// let h = SipHasher::new(); - /// let mut map = HashMap::with_hasher(h); - /// map.insert(1i, 2u); - /// ``` - #[inline] - pub fn with_hasher(hasher: H) -> HashMap { - HashMap { - hasher: hasher, - resize_policy: DefaultResizePolicy::new(INITIAL_CAPACITY), - table: RawTable::new(0), - } - } - - /// Create an empty HashMap with space for at least `capacity` - /// elements, using `hasher` to hash the keys. - /// - /// Warning: `hasher` is normally randomly generated, and - /// is designed to allow HashMaps to be resistant to attacks that - /// cause many collisions and very poor performance. Setting it - /// manually using this function can expose a DoS attack vector. - /// - /// # Example - /// - /// ``` - /// use std::collections::HashMap; - /// use std::hash::sip::SipHasher; - /// - /// let h = SipHasher::new(); - /// let mut map = HashMap::with_capacity_and_hasher(10, h); - /// map.insert(1i, 2u); - /// ``` - #[inline] - pub fn with_capacity_and_hasher(capacity: uint, hasher: H) -> HashMap { - let cap = num::next_power_of_two(max(INITIAL_CAPACITY, capacity)); - HashMap { - hasher: hasher, - resize_policy: DefaultResizePolicy::new(cap), - table: RawTable::new(cap), - } - } - - /// The hashtable will never try to shrink below this size. You can use - /// this function to reduce reallocations if your hashtable frequently - /// grows and shrinks by large amounts. - /// - /// This function has no effect on the operational semantics of the - /// hashtable, only on performance. - /// - /// # Example - /// - /// ``` - /// use std::collections::HashMap; - /// let mut map: HashMap<&str, int> = HashMap::new(); - /// map.reserve(10); - /// ``` - pub fn reserve(&mut self, new_minimum_capacity: uint) { - let cap = num::next_power_of_two( - max(INITIAL_CAPACITY, new_minimum_capacity)); - - self.resize_policy.reserve(cap); - - if self.table.capacity() < cap { - self.resize(cap); - } - } - - /// Resizes the internal vectors to a new capacity. It's your responsibility to: - /// 1) Make sure the new capacity is enough for all the elements, accounting - /// for the load factor. - /// 2) Ensure new_capacity is a power of two. - fn resize(&mut self, new_capacity: uint) { - assert!(self.table.size() <= new_capacity); - assert!(num::is_power_of_two(new_capacity)); - - let mut old_table = replace(&mut self.table, RawTable::new(new_capacity)); - let old_size = old_table.size(); - - if old_table.capacity() == 0 || old_table.size() == 0 { - return; - } - - if new_capacity < old_table.capacity() { - // Shrink the table. Naive algorithm for resizing: - for (h, k, v) in old_table.into_iter() { - self.insert_hashed_nocheck(h, k, v); - } - } else { - // Grow the table. - // Specialization of the other branch. - let mut bucket = Bucket::first(&mut old_table); - - // "So a few of the first shall be last: for many be called, - // but few chosen." - // - // We'll most likely encounter a few buckets at the beginning that - // have their initial buckets near the end of the table. They were - // placed at the beginning as the probe wrapped around the table - // during insertion. We must skip forward to a bucket that won't - // get reinserted too early and won't unfairly steal others spot. - // This eliminates the need for robin hood. - loop { - bucket = match bucket.peek() { - Full(full) => { - if full.distance() == 0 { - // This bucket occupies its ideal spot. - // It indicates the start of another "cluster". - bucket = full.into_bucket(); - break; - } - // Leaving this bucket in the last cluster for later. - full.into_bucket() - } - Empty(b) => { - // Encountered a hole between clusters. - b.into_bucket() - } - }; - bucket.next(); - } - - // This is how the buckets might be laid out in memory: - // ($ marks an initialized bucket) - // ________________ - // |$$$_$$$$$$_$$$$$| - // - // But we've skipped the entire initial cluster of buckets - // and will continue iteration in this order: - // ________________ - // |$$$$$$_$$$$$ - // ^ wrap around once end is reached - // ________________ - // $$$_____________| - // ^ exit once table.size == 0 - loop { - bucket = match bucket.peek() { - Full(bucket) => { - let h = bucket.hash(); - let (b, k, v) = bucket.take(); - self.insert_hashed_ordered(h, k, v); - { - let t = b.table(); // FIXME "lifetime too short". - if t.size() == 0 { break } - }; - b.into_bucket() - } - Empty(b) => b.into_bucket() - }; - bucket.next(); - } - } - - assert_eq!(self.table.size(), old_size); - } - - /// Performs any necessary resize operations, such that there's space for - /// new_size elements. - fn make_some_room(&mut self, new_size: uint) { - let (grow_at, shrink_at) = self.resize_policy.capacity_range(new_size); - let cap = self.table.capacity(); - - // An invalid value shouldn't make us run out of space. - debug_assert!(grow_at >= new_size); - - if cap <= grow_at { - let new_capacity = max(cap << 1, INITIAL_CAPACITY); - self.resize(new_capacity); - } else if shrink_at <= cap { - let new_capacity = cap >> 1; - self.resize(new_capacity); - } - } - - /// Insert a pre-hashed key-value pair, without first checking - /// that there's enough room in the buckets. Returns a reference to the - /// newly insert value. - /// - /// If the key already exists, the hashtable will be returned untouched - /// and a reference to the existing element will be returned. - fn insert_hashed_nocheck(&mut self, hash: SafeHash, k: K, v: V) -> &mut V { - self.insert_or_replace_with(hash, k, v, |_, _, _| ()) - } - - fn insert_or_replace_with<'a>(&'a mut self, - hash: SafeHash, - k: K, - v: V, - found_existing: |&mut K, &mut V, V|) - -> &'a mut V { - // Worst case, we'll find one empty bucket among `size + 1` buckets. - let size = self.table.size(); - let mut probe = Bucket::new(&mut self.table, &hash); - let ib = probe.index(); - - loop { - let mut bucket = match probe.peek() { - Empty(bucket) => { - // Found a hole! - let bucket = bucket.put(hash, k, v); - let (_, val) = bucket.into_mut_refs(); - return val; - }, - Full(bucket) => bucket - }; - - if bucket.hash() == hash { - let found_match = { - let (bucket_k, _) = bucket.read_mut(); - k == *bucket_k - }; - if found_match { - let (bucket_k, bucket_v) = bucket.into_mut_refs(); - debug_assert!(k == *bucket_k); - // Key already exists. Get its reference. - found_existing(bucket_k, bucket_v, v); - return bucket_v; - } - } - - let robin_ib = bucket.index() as int - bucket.distance() as int; - - if (ib as int) < robin_ib { - // Found a luckier bucket than me. Better steal his spot. - return robin_hood(bucket, robin_ib as uint, hash, k, v); - } - - probe = bucket.next(); - assert!(probe.index() != ib + size + 1); - } - } - - /// Retrieves a mutable value for the given key. - /// See [`find_mut`](../trait.MutableMap.html#tymethod.find_mut) for a non-panicking - /// alternative. - /// - /// # Failure - /// - /// Fails if the key is not present. - /// - /// # Example - /// - /// ``` - /// # #![allow(deprecated)] - /// use std::collections::HashMap; - /// - /// let mut map = HashMap::new(); - /// map.insert("a", 1i); - /// { - /// // val will freeze map to prevent usage during its lifetime - /// let val = map.get_mut(&"a"); - /// *val = 40; - /// } - /// assert_eq!(map["a"], 40); - /// - /// // A more direct way could be: - /// *map.get_mut(&"a") = -2; - /// assert_eq!(map["a"], -2); - /// ``` - #[deprecated = "use indexing instead: `&mut map[key]`"] - pub fn get_mut<'a>(&'a mut self, k: &K) -> &'a mut V { - &mut self[*k] - } - - /// Return true if the map contains a value for the specified key, - /// using equivalence. - /// - /// See [pop_equiv](#method.pop_equiv) for an extended example. - pub fn contains_key_equiv + Equiv>(&self, key: &Q) -> bool { - self.search_equiv(key).is_some() - } - - /// Return the value corresponding to the key in the map, using - /// equivalence. - /// - /// See [pop_equiv](#method.pop_equiv) for an extended example. - pub fn find_equiv<'a, Sized? Q: Hash + Equiv>(&'a self, k: &Q) -> Option<&'a V> { - match self.search_equiv(k) { - None => None, - Some(bucket) => { - let (_, v_ref) = bucket.into_refs(); - Some(v_ref) - } - } - } - - /// Remove an equivalent key from the map, returning the value at the - /// key if the key was previously in the map. - /// - /// # Example - /// - /// This is a slightly silly example where we define the number's - /// parity as the equivalence class. It is important that the - /// values hash the same, which is why we implement `Hash`. - /// - /// ``` - /// use std::collections::HashMap; - /// use std::hash::Hash; - /// use std::hash::sip::SipState; - /// - /// #[deriving(Eq, PartialEq)] - /// struct EvenOrOdd { - /// num: uint - /// }; - /// - /// impl Hash for EvenOrOdd { - /// fn hash(&self, state: &mut SipState) { - /// let parity = self.num % 2; - /// parity.hash(state); - /// } - /// } - /// - /// impl Equiv for EvenOrOdd { - /// fn equiv(&self, other: &EvenOrOdd) -> bool { - /// self.num % 2 == other.num % 2 - /// } - /// } - /// - /// let mut map = HashMap::new(); - /// map.insert(EvenOrOdd { num: 3 }, "foo"); - /// - /// assert!(map.contains_key_equiv(&EvenOrOdd { num: 1 })); - /// assert!(!map.contains_key_equiv(&EvenOrOdd { num: 4 })); - /// - /// assert_eq!(map.find_equiv(&EvenOrOdd { num: 5 }), Some(&"foo")); - /// assert_eq!(map.find_equiv(&EvenOrOdd { num: 2 }), None); - /// - /// assert_eq!(map.pop_equiv(&EvenOrOdd { num: 1 }), Some("foo")); - /// assert_eq!(map.pop_equiv(&EvenOrOdd { num: 2 }), None); - /// - /// ``` - #[experimental] - pub fn pop_equiv + Equiv>(&mut self, k: &Q) -> Option { - if self.table.size() == 0 { - return None - } - - let potential_new_size = self.table.size() - 1; - self.make_some_room(potential_new_size); - - match self.search_equiv_mut(k) { - Some(bucket) => { - let (_k, val) = pop_internal(bucket); - Some(val) - } - _ => None - } - } - - /// An iterator visiting all keys in arbitrary order. - /// Iterator element type is `&'a K`. - /// - /// # Example - /// - /// ``` - /// use std::collections::HashMap; - /// - /// let mut map = HashMap::new(); - /// map.insert("a", 1i); - /// map.insert("b", 2); - /// map.insert("c", 3); - /// - /// for key in map.keys() { - /// println!("{}", key); - /// } - /// ``` - pub fn keys(&self) -> Keys { - self.iter().map(|(k, _v)| k) - } - - /// An iterator visiting all values in arbitrary order. - /// Iterator element type is `&'a V`. - /// - /// # Example - /// - /// ``` - /// use std::collections::HashMap; - /// - /// let mut map = HashMap::new(); - /// map.insert("a", 1i); - /// map.insert("b", 2); - /// map.insert("c", 3); - /// - /// for key in map.values() { - /// println!("{}", key); - /// } - /// ``` - pub fn values(&self) -> Values { - self.iter().map(|(_k, v)| v) - } - - /// An iterator visiting all key-value pairs in arbitrary order. - /// Iterator element type is `(&'a K, &'a V)`. - /// - /// # Example - /// - /// ``` - /// use std::collections::HashMap; - /// - /// let mut map = HashMap::new(); - /// map.insert("a", 1i); - /// map.insert("b", 2); - /// map.insert("c", 3); - /// - /// for (key, val) in map.iter() { - /// println!("key: {} val: {}", key, val); - /// } - /// ``` - pub fn iter(&self) -> Entries { - Entries { inner: self.table.iter() } - } - - /// An iterator visiting all key-value pairs in arbitrary order, - /// with mutable references to the values. - /// Iterator element type is `(&'a K, &'a mut V)`. - /// - /// # Example - /// - /// ``` - /// use std::collections::HashMap; - /// - /// let mut map = HashMap::new(); - /// map.insert("a", 1i); - /// map.insert("b", 2); - /// map.insert("c", 3); - /// - /// // Update all values - /// for (_, val) in map.iter_mut() { - /// *val *= 2; - /// } - /// - /// for (key, val) in map.iter() { - /// println!("key: {} val: {}", key, val); - /// } - /// ``` - pub fn iter_mut(&mut self) -> MutEntries { - MutEntries { inner: self.table.iter_mut() } - } - - /// Creates a consuming iterator, that is, one that moves each key-value - /// pair out of the map in arbitrary order. The map cannot be used after - /// calling this. - /// - /// # Example - /// - /// ``` - /// use std::collections::HashMap; - /// - /// let mut map = HashMap::new(); - /// map.insert("a", 1i); - /// map.insert("b", 2); - /// map.insert("c", 3); - /// - /// // Not possible with .iter() - /// let vec: Vec<(&str, int)> = map.into_iter().collect(); - /// ``` - pub fn into_iter(self) -> MoveEntries { - MoveEntries { - inner: self.table.into_iter().map(|(_, k, v)| (k, v)) - } - } - - /// Gets the given key's corresponding entry in the map for in-place manipulation - pub fn entry<'a>(&'a mut self, key: K) -> Entry<'a, K, V> { - // Gotta resize now, and we don't know which direction, so try both? - let size = self.table.size(); - self.make_some_room(size + 1); - if size > 0 { - self.make_some_room(size - 1); - } - - let hash = self.make_hash(&key); - search_entry_hashed(&mut self.table, hash, key) - } - - /// Return the number of elements in the map. - /// - /// # Example - /// - /// ``` - /// use std::collections::HashMap; - /// - /// let mut a = HashMap::new(); - /// assert_eq!(a.len(), 0); - /// a.insert(1u, "a"); - /// assert_eq!(a.len(), 1); - /// ``` - pub fn len(&self) -> uint { self.table.size() } - - /// Return true if the map contains no elements. - /// - /// # Example - /// - /// ``` - /// use std::collections::HashMap; - /// - /// let mut a = HashMap::new(); - /// assert!(a.is_empty()); - /// a.insert(1u, "a"); - /// assert!(!a.is_empty()); - /// ``` - #[inline] - pub fn is_empty(&self) -> bool { self.len() == 0 } - - /// Clears the map, removing all key-value pairs. Keeps the allocated memory - /// for reuse. - /// - /// # Example - /// - /// ``` - /// use std::collections::HashMap; - /// - /// let mut a = HashMap::new(); - /// a.insert(1u, "a"); - /// a.clear(); - /// assert!(a.is_empty()); - /// ``` - pub fn clear(&mut self) { - // Prevent reallocations from happening from now on. Makes it possible - // for the map to be reused but has a downside: reserves permanently. - self.resize_policy.reserve(self.table.size()); - - let cap = self.table.capacity(); - let mut buckets = Bucket::first(&mut self.table); - - while buckets.index() != cap { - buckets = match buckets.peek() { - Empty(b) => b.next(), - Full(full) => { - let (b, _, _) = full.take(); - b.next() - } - }; - } - } - - /// Returns a reference to the value corresponding to the key. - /// - /// # Example - /// - /// ``` - /// use std::collections::HashMap; - /// - /// let mut map = HashMap::new(); - /// map.insert(1u, "a"); - /// assert_eq!(map.find(&1), Some(&"a")); - /// assert_eq!(map.find(&2), None); - /// ``` - pub fn find<'a>(&'a self, k: &K) -> Option<&'a V> { - self.search(k).map(|bucket| { - let (_, v) = bucket.into_refs(); - v - }) - } - - /// Returns true if the map contains a value for the specified key. - /// - /// # Example - /// - /// ``` - /// use std::collections::HashMap; - /// - /// let mut map = HashMap::new(); - /// map.insert(1u, "a"); - /// assert_eq!(map.contains_key(&1), true); - /// assert_eq!(map.contains_key(&2), false); - /// ``` - pub fn contains_key(&self, k: &K) -> bool { - self.search(k).is_some() - } - - /// Returns a mutable reference to the value corresponding to the key. - /// - /// # Example - /// - /// ``` - /// use std::collections::HashMap; - /// - /// let mut map = HashMap::new(); - /// map.insert(1u, "a"); - /// match map.find_mut(&1) { - /// Some(x) => *x = "b", - /// None => (), - /// } - /// assert_eq!(map[1], "b"); - /// ``` - pub fn find_mut<'a>(&'a mut self, k: &K) -> Option<&'a mut V> { - match self.search_mut(k) { - Some(bucket) => { - let (_, v) = bucket.into_mut_refs(); - Some(v) - } - _ => None - } - } - - /// Inserts a key-value pair into the map. An existing value for a - /// key is replaced by the new value. Returns `true` if the key did - /// not already exist in the map. - /// - /// # Example - /// - /// ``` - /// use std::collections::HashMap; - /// - /// let mut map = HashMap::new(); - /// assert_eq!(map.insert(2u, "value"), true); - /// assert_eq!(map.insert(2, "value2"), false); - /// assert_eq!(map[2], "value2"); - /// ``` - #[inline] - pub fn insert(&mut self, key: K, value: V) -> bool { - self.swap(key, value).is_none() - } - - /// Removes a key-value pair from the map. Returns `true` if the key - /// was present in the map. - /// - /// # Example - /// - /// ``` - /// use std::collections::HashMap; - /// - /// let mut map = HashMap::new(); - /// assert_eq!(map.remove(&1u), false); - /// map.insert(1, "a"); - /// assert_eq!(map.remove(&1), true); - /// ``` - #[inline] - pub fn remove(&mut self, key: &K) -> bool { - self.pop(key).is_some() - } - - /// Inserts a key-value pair from the map. If the key already had a value - /// present in the map, that value is returned. Otherwise, `None` is returned. - /// - /// # Example - /// - /// ``` - /// use std::collections::HashMap; - /// - /// let mut map = HashMap::new(); - /// assert_eq!(map.swap(37u, "a"), None); - /// assert_eq!(map.is_empty(), false); - /// - /// map.insert(37, "b"); - /// assert_eq!(map.swap(37, "c"), Some("b")); - /// assert_eq!(map[37], "c"); - /// ``` - pub fn swap(&mut self, k: K, v: V) -> Option { - let hash = self.make_hash(&k); - let potential_new_size = self.table.size() + 1; - self.make_some_room(potential_new_size); - - let mut retval = None; - self.insert_or_replace_with(hash, k, v, |_, val_ref, val| { - retval = Some(replace(val_ref, val)); - }); - retval - } - - /// Removes a key from the map, returning the value at the key if the key - /// was previously in the map. - /// - /// # Example - /// - /// ``` - /// use std::collections::HashMap; - /// - /// let mut map = HashMap::new(); - /// map.insert(1u, "a"); - /// assert_eq!(map.pop(&1), Some("a")); - /// assert_eq!(map.pop(&1), None); - /// ``` - pub fn pop(&mut self, k: &K) -> Option { - if self.table.size() == 0 { - return None - } - - let potential_new_size = self.table.size() - 1; - self.make_some_room(potential_new_size); - - self.search_mut(k).map(|bucket| { - let (_k, val) = pop_internal(bucket); - val - }) - } -} - -fn search_entry_hashed<'a, K: Eq, V>(table: &'a mut RawTable, hash: SafeHash, k: K) - -> Entry<'a, K, V> { - // Worst case, we'll find one empty bucket among `size + 1` buckets. - let size = table.size(); - let mut probe = Bucket::new(table, &hash); - let ib = probe.index(); - - loop { - let bucket = match probe.peek() { - Empty(bucket) => { - // Found a hole! - return Vacant(VacantEntry { - hash: hash, - key: k, - elem: NoElem(bucket), - }); - }, - Full(bucket) => bucket - }; - - if bucket.hash() == hash { - let is_eq = { - let (bucket_k, _) = bucket.read(); - k == *bucket_k - }; - - if is_eq { - return Occupied(OccupiedEntry{ - elem: bucket, - }); - } - } - - let robin_ib = bucket.index() as int - bucket.distance() as int; - - if (ib as int) < robin_ib { - // Found a luckier bucket than me. Better steal his spot. - return Vacant(VacantEntry { - hash: hash, - key: k, - elem: NeqElem(bucket, robin_ib as uint), - }); - } - - probe = bucket.next(); - assert!(probe.index() != ib + size + 1); - } -} - -impl, V: Clone, S, H: Hasher> HashMap { - /// Return a copy of the value corresponding to the key. - /// - /// # Example - /// - /// ``` - /// use std::collections::HashMap; - /// - /// let mut map: HashMap = HashMap::new(); - /// map.insert(1u, "foo".to_string()); - /// let s: String = map.find_copy(&1).unwrap(); - /// ``` - pub fn find_copy(&self, k: &K) -> Option { - self.find(k).map(|v| (*v).clone()) - } - - /// Return a copy of the value corresponding to the key. - /// - /// # Failure - /// - /// Fails if the key is not present. - /// - /// # Example - /// - /// ``` - /// use std::collections::HashMap; - /// - /// let mut map: HashMap = HashMap::new(); - /// map.insert(1u, "foo".to_string()); - /// let s: String = map.get_copy(&1); - /// ``` - pub fn get_copy(&self, k: &K) -> V { - self[*k].clone() - } -} - -impl, V: PartialEq, S, H: Hasher> PartialEq for HashMap { - fn eq(&self, other: &HashMap) -> bool { - if self.len() != other.len() { return false; } - - self.iter().all(|(key, value)| - other.find(key).map_or(false, |v| *value == *v) - ) - } -} - -impl, V: Eq, S, H: Hasher> Eq for HashMap {} - -impl + Show, V: Show, S, H: Hasher> Show for HashMap { - fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { - try!(write!(f, "{{")); - - for (i, (k, v)) in self.iter().enumerate() { - if i != 0 { try!(write!(f, ", ")); } - try!(write!(f, "{}: {}", *k, *v)); - } - - write!(f, "}}") - } -} - -impl, V, S, H: Hasher + Default> Default for HashMap { - fn default() -> HashMap { - HashMap::with_hasher(Default::default()) - } -} - -impl, V, S, H: Hasher> Index for HashMap { - #[inline] - fn index<'a>(&'a self, index: &K) -> &'a V { - self.find(index).expect("no entry found for key") - } -} - -impl, V, S, H: Hasher> IndexMut for HashMap { - #[inline] - fn index_mut<'a>(&'a mut self, index: &K) -> &'a mut V { - match self.find_mut(index) { - Some(v) => v, - None => panic!("no entry found for key") - } - } -} - -/// HashMap iterator -pub struct Entries<'a, K: 'a, V: 'a> { - inner: table::Entries<'a, K, V> -} - -/// HashMap mutable values iterator -pub struct MutEntries<'a, K: 'a, V: 'a> { - inner: table::MutEntries<'a, K, V> -} - -/// HashMap move iterator -pub struct MoveEntries { - inner: iter::Map<'static, (SafeHash, K, V), (K, V), table::MoveEntries> -} - -/// A view into a single occupied location in a HashMap -pub struct OccupiedEntry<'a, K:'a, V:'a> { - elem: FullBucket>, -} - -/// A view into a single empty location in a HashMap -pub struct VacantEntry<'a, K:'a, V:'a> { - hash: SafeHash, - key: K, - elem: VacantEntryState>, -} - -/// A view into a single location in a map, which may be vacant or occupied -pub enum Entry<'a, K:'a, V:'a> { - /// An occupied Entry - Occupied(OccupiedEntry<'a, K, V>), - /// A vacant Entry - Vacant(VacantEntry<'a, K, V>), -} - -/// Possible states of a VacantEntry -enum VacantEntryState { - /// The index is occupied, but the key to insert has precedence, - /// and will kick the current one out on insertion - NeqElem(FullBucket, uint), - /// The index is genuinely vacant - NoElem(EmptyBucket), -} - -impl<'a, K, V> Iterator<(&'a K, &'a V)> for Entries<'a, K, V> { - #[inline] - fn next(&mut self) -> Option<(&'a K, &'a V)> { - self.inner.next() - } - #[inline] - fn size_hint(&self) -> (uint, Option) { - self.inner.size_hint() - } -} - -impl<'a, K, V> Iterator<(&'a K, &'a mut V)> for MutEntries<'a, K, V> { - #[inline] - fn next(&mut self) -> Option<(&'a K, &'a mut V)> { - self.inner.next() - } - #[inline] - fn size_hint(&self) -> (uint, Option) { - self.inner.size_hint() - } -} - -impl Iterator<(K, V)> for MoveEntries { - #[inline] - fn next(&mut self) -> Option<(K, V)> { - self.inner.next() - } - #[inline] - fn size_hint(&self) -> (uint, Option) { - self.inner.size_hint() - } -} - -impl<'a, K, V> OccupiedEntry<'a, K, V> { - /// Gets a reference to the value in the entry - pub fn get(&self) -> &V { - let (_, v) = self.elem.read(); - v - } - - /// Gets a mutable reference to the value in the entry - pub fn get_mut(&mut self) -> &mut V { - let (_, v) = self.elem.read_mut(); - v - } - - /// Converts the OccupiedEntry into a mutable reference to the value in the entry - /// with a lifetime bound to the map itself - pub fn into_mut(self) -> &'a mut V { - let (_, v) = self.elem.into_mut_refs(); - v - } - - /// Sets the value of the entry, and returns the entry's old value - pub fn set(&mut self, mut value: V) -> V { - let old_value = self.get_mut(); - mem::swap(&mut value, old_value); - value - } - - /// Takes the value out of the entry, and returns it - pub fn take(self) -> V { - let (_, _, v) = self.elem.take(); - v - } -} - -impl<'a, K, V> VacantEntry<'a, K, V> { - /// Sets the value of the entry with the VacantEntry's key, - /// and returns a mutable reference to it - pub fn set(self, value: V) -> &'a mut V { - match self.elem { - NeqElem(bucket, ib) => { - robin_hood(bucket, ib, self.hash, self.key, value) - } - NoElem(bucket) => { - let full = bucket.put(self.hash, self.key, value); - let (_, v) = full.into_mut_refs(); - v - } - } - } -} - -/// HashMap keys iterator -pub type Keys<'a, K, V> = - iter::Map<'static, (&'a K, &'a V), &'a K, Entries<'a, K, V>>; - -/// HashMap values iterator -pub type Values<'a, K, V> = - iter::Map<'static, (&'a K, &'a V), &'a V, Entries<'a, K, V>>; - -impl, V, S, H: Hasher + Default> FromIterator<(K, V)> for HashMap { - fn from_iter>(iter: T) -> HashMap { - let (lower, _) = iter.size_hint(); - let mut map = HashMap::with_capacity_and_hasher(lower, Default::default()); - map.extend(iter); - map - } -} - -impl, V, S, H: Hasher + Default> Extendable<(K, V)> for HashMap { - fn extend>(&mut self, mut iter: T) { - for (k, v) in iter { - self.insert(k, v); - } - } -} - -#[cfg(test)] -mod test_map { - use prelude::*; - - use super::HashMap; - use super::{Occupied, Vacant}; - use cmp::Equiv; - use hash; - use iter::{Iterator,range_inclusive,range_step_inclusive}; - use cell::RefCell; - - struct KindaIntLike(int); - - impl Equiv for KindaIntLike { - fn equiv(&self, other: &int) -> bool { - let KindaIntLike(this) = *self; - this == *other - } - } - impl hash::Hash for KindaIntLike { - fn hash(&self, state: &mut S) { - let KindaIntLike(this) = *self; - this.hash(state) - } - } - - #[test] - fn test_create_capacity_zero() { - let mut m = HashMap::with_capacity(0); - - assert!(m.insert(1i, 1i)); - - assert!(m.contains_key(&1)); - assert!(!m.contains_key(&0)); - } - - #[test] - fn test_insert() { - let mut m = HashMap::new(); - assert_eq!(m.len(), 0); - assert!(m.insert(1i, 2i)); - assert_eq!(m.len(), 1); - assert!(m.insert(2i, 4i)); - assert_eq!(m.len(), 2); - assert_eq!(*m.find(&1).unwrap(), 2); - assert_eq!(*m.find(&2).unwrap(), 4); - } - - local_data_key!(drop_vector: RefCell>) - - #[deriving(Hash, PartialEq, Eq)] - struct Dropable { - k: uint - } - - impl Dropable { - fn new(k: uint) -> Dropable { - let v = drop_vector.get().unwrap(); - v.borrow_mut().as_mut_slice()[k] += 1; - - Dropable { k: k } - } - } - - impl Drop for Dropable { - fn drop(&mut self) { - let v = drop_vector.get().unwrap(); - v.borrow_mut().as_mut_slice()[self.k] -= 1; - } - } - - impl Clone for Dropable { - fn clone(&self) -> Dropable { - Dropable::new(self.k) - } - } - - #[test] - fn test_drops() { - drop_vector.replace(Some(RefCell::new(Vec::from_elem(200, 0i)))); - - { - let mut m = HashMap::new(); - - let v = drop_vector.get().unwrap(); - for i in range(0u, 200) { - assert_eq!(v.borrow().as_slice()[i], 0); - } - drop(v); - - for i in range(0u, 100) { - let d1 = Dropable::new(i); - let d2 = Dropable::new(i+100); - m.insert(d1, d2); - } - - let v = drop_vector.get().unwrap(); - for i in range(0u, 200) { - assert_eq!(v.borrow().as_slice()[i], 1); - } - drop(v); - - for i in range(0u, 50) { - let k = Dropable::new(i); - let v = m.pop(&k); - - assert!(v.is_some()); - - let v = drop_vector.get().unwrap(); - assert_eq!(v.borrow().as_slice()[i], 1); - assert_eq!(v.borrow().as_slice()[i+100], 1); - } - - let v = drop_vector.get().unwrap(); - for i in range(0u, 50) { - assert_eq!(v.borrow().as_slice()[i], 0); - assert_eq!(v.borrow().as_slice()[i+100], 0); - } - - for i in range(50u, 100) { - assert_eq!(v.borrow().as_slice()[i], 1); - assert_eq!(v.borrow().as_slice()[i+100], 1); - } - } - - let v = drop_vector.get().unwrap(); - for i in range(0u, 200) { - assert_eq!(v.borrow().as_slice()[i], 0); - } - } - - #[test] - fn test_move_iter_drops() { - drop_vector.replace(Some(RefCell::new(Vec::from_elem(200, 0i)))); - - let hm = { - let mut hm = HashMap::new(); - - let v = drop_vector.get().unwrap(); - for i in range(0u, 200) { - assert_eq!(v.borrow().as_slice()[i], 0); - } - drop(v); - - for i in range(0u, 100) { - let d1 = Dropable::new(i); - let d2 = Dropable::new(i+100); - hm.insert(d1, d2); - } - - let v = drop_vector.get().unwrap(); - for i in range(0u, 200) { - assert_eq!(v.borrow().as_slice()[i], 1); - } - drop(v); - - hm - }; - - // By the way, ensure that cloning doesn't screw up the dropping. - drop(hm.clone()); - - { - let mut half = hm.into_iter().take(50); - - let v = drop_vector.get().unwrap(); - for i in range(0u, 200) { - assert_eq!(v.borrow().as_slice()[i], 1); - } - drop(v); - - for _ in half {} - - let v = drop_vector.get().unwrap(); - let nk = range(0u, 100).filter(|&i| { - v.borrow().as_slice()[i] == 1 - }).count(); - - let nv = range(0u, 100).filter(|&i| { - v.borrow().as_slice()[i+100] == 1 - }).count(); - - assert_eq!(nk, 50); - assert_eq!(nv, 50); - }; - - let v = drop_vector.get().unwrap(); - for i in range(0u, 200) { - assert_eq!(v.borrow().as_slice()[i], 0); - } - } - - #[test] - fn test_empty_pop() { - let mut m: HashMap = HashMap::new(); - assert_eq!(m.pop(&0), None); - } - - #[test] - fn test_lots_of_insertions() { - let mut m = HashMap::new(); - - // Try this a few times to make sure we never screw up the hashmap's - // internal state. - for _ in range(0i, 10) { - assert!(m.is_empty()); - - for i in range_inclusive(1i, 1000) { - assert!(m.insert(i, i)); - - for j in range_inclusive(1, i) { - let r = m.find(&j); - assert_eq!(r, Some(&j)); - } - - for j in range_inclusive(i+1, 1000) { - let r = m.find(&j); - assert_eq!(r, None); - } - } - - for i in range_inclusive(1001i, 2000) { - assert!(!m.contains_key(&i)); - } - - // remove forwards - for i in range_inclusive(1i, 1000) { - assert!(m.remove(&i)); - - for j in range_inclusive(1, i) { - assert!(!m.contains_key(&j)); - } - - for j in range_inclusive(i+1, 1000) { - assert!(m.contains_key(&j)); - } - } - - for i in range_inclusive(1i, 1000) { - assert!(!m.contains_key(&i)); - } - - for i in range_inclusive(1i, 1000) { - assert!(m.insert(i, i)); - } - - // remove backwards - for i in range_step_inclusive(1000i, 1, -1) { - assert!(m.remove(&i)); - - for j in range_inclusive(i, 1000) { - assert!(!m.contains_key(&j)); - } - - for j in range_inclusive(1, i-1) { - assert!(m.contains_key(&j)); - } - } - } - } - - #[test] - fn test_find_mut() { - let mut m = HashMap::new(); - assert!(m.insert(1i, 12i)); - assert!(m.insert(2i, 8i)); - assert!(m.insert(5i, 14i)); - let new = 100; - match m.find_mut(&5) { - None => panic!(), Some(x) => *x = new - } - assert_eq!(m.find(&5), Some(&new)); - } - - #[test] - fn test_insert_overwrite() { - let mut m = HashMap::new(); - assert!(m.insert(1i, 2i)); - assert_eq!(*m.find(&1).unwrap(), 2); - assert!(!m.insert(1i, 3i)); - assert_eq!(*m.find(&1).unwrap(), 3); - } - - #[test] - fn test_insert_conflicts() { - let mut m = HashMap::with_capacity(4); - assert!(m.insert(1i, 2i)); - assert!(m.insert(5i, 3i)); - assert!(m.insert(9i, 4i)); - assert_eq!(*m.find(&9).unwrap(), 4); - assert_eq!(*m.find(&5).unwrap(), 3); - assert_eq!(*m.find(&1).unwrap(), 2); - } - - #[test] - fn test_conflict_remove() { - let mut m = HashMap::with_capacity(4); - assert!(m.insert(1i, 2i)); - assert_eq!(*m.find(&1).unwrap(), 2); - assert!(m.insert(5, 3)); - assert_eq!(*m.find(&1).unwrap(), 2); - assert_eq!(*m.find(&5).unwrap(), 3); - assert!(m.insert(9, 4)); - assert_eq!(*m.find(&1).unwrap(), 2); - assert_eq!(*m.find(&5).unwrap(), 3); - assert_eq!(*m.find(&9).unwrap(), 4); - assert!(m.remove(&1)); - assert_eq!(*m.find(&9).unwrap(), 4); - assert_eq!(*m.find(&5).unwrap(), 3); - } - - #[test] - fn test_is_empty() { - let mut m = HashMap::with_capacity(4); - assert!(m.insert(1i, 2i)); - assert!(!m.is_empty()); - assert!(m.remove(&1)); - assert!(m.is_empty()); - } - - #[test] - fn test_pop() { - let mut m = HashMap::new(); - m.insert(1i, 2i); - assert_eq!(m.pop(&1), Some(2)); - assert_eq!(m.pop(&1), None); - } - - #[test] - #[allow(experimental)] - fn test_pop_equiv() { - let mut m = HashMap::new(); - m.insert(1i, 2i); - assert_eq!(m.pop_equiv(&KindaIntLike(1)), Some(2)); - assert_eq!(m.pop_equiv(&KindaIntLike(1)), None); - } - - #[test] - fn test_swap() { - let mut m = HashMap::new(); - assert_eq!(m.swap(1i, 2i), None); - assert_eq!(m.swap(1i, 3i), Some(2)); - assert_eq!(m.swap(1i, 4i), Some(3)); - } - - #[test] - fn test_iterate() { - let mut m = HashMap::with_capacity(4); - for i in range(0u, 32) { - assert!(m.insert(i, i*2)); - } - assert_eq!(m.len(), 32); - - let mut observed: u32 = 0; - - for (k, v) in m.iter() { - assert_eq!(*v, *k * 2); - observed |= 1 << *k; - } - assert_eq!(observed, 0xFFFF_FFFF); - } - - #[test] - fn test_keys() { - let vec = vec![(1i, 'a'), (2i, 'b'), (3i, 'c')]; - let map = vec.into_iter().collect::>(); - let keys = map.keys().map(|&k| k).collect::>(); - assert_eq!(keys.len(), 3); - assert!(keys.contains(&1)); - assert!(keys.contains(&2)); - assert!(keys.contains(&3)); - } - - #[test] - fn test_values() { - let vec = vec![(1i, 'a'), (2i, 'b'), (3i, 'c')]; - let map = vec.into_iter().collect::>(); - let values = map.values().map(|&v| v).collect::>(); - assert_eq!(values.len(), 3); - assert!(values.contains(&'a')); - assert!(values.contains(&'b')); - assert!(values.contains(&'c')); - } - - #[test] - fn test_find() { - let mut m = HashMap::new(); - assert!(m.find(&1i).is_none()); - m.insert(1i, 2i); - match m.find(&1) { - None => panic!(), - Some(v) => assert_eq!(*v, 2) - } - } - - #[test] - fn test_find_copy() { - let mut m = HashMap::new(); - assert!(m.find(&1i).is_none()); - - for i in range(1i, 10000) { - m.insert(i, i + 7); - match m.find_copy(&i) { - None => panic!(), - Some(v) => assert_eq!(v, i + 7) - } - for j in range(1i, i/100) { - match m.find_copy(&j) { - None => panic!(), - Some(v) => assert_eq!(v, j + 7) - } - } - } - } - - #[test] - fn test_eq() { - let mut m1 = HashMap::new(); - m1.insert(1i, 2i); - m1.insert(2i, 3i); - m1.insert(3i, 4i); - - let mut m2 = HashMap::new(); - m2.insert(1i, 2i); - m2.insert(2i, 3i); - - assert!(m1 != m2); - - m2.insert(3i, 4i); - - assert_eq!(m1, m2); - } - - #[test] - fn test_show() { - let mut map: HashMap = HashMap::new(); - let empty: HashMap = HashMap::new(); - - map.insert(1i, 2i); - map.insert(3i, 4i); - - let map_str = format!("{}", map); - - assert!(map_str == "{1: 2, 3: 4}".to_string() || map_str == "{3: 4, 1: 2}".to_string()); - assert_eq!(format!("{}", empty), "{}".to_string()); - } - - #[test] - fn test_expand() { - let mut m = HashMap::new(); - - assert_eq!(m.len(), 0); - assert!(m.is_empty()); - - let mut i = 0u; - let old_cap = m.table.capacity(); - while old_cap == m.table.capacity() { - m.insert(i, i); - i += 1; - } - - assert_eq!(m.len(), i); - assert!(!m.is_empty()); - } - - #[test] - fn test_resize_policy() { - let mut m = HashMap::new(); - - assert_eq!(m.len(), 0); - assert_eq!(m.table.capacity(), 0); - assert!(m.is_empty()); - - m.insert(0, 0); - m.remove(&0); - assert!(m.is_empty()); - let initial_cap = m.table.capacity(); - m.reserve(initial_cap * 2); - let cap = m.table.capacity(); - - assert_eq!(cap, initial_cap * 2); - - let mut i = 0u; - for _ in range(0, cap * 3 / 4) { - m.insert(i, i); - i += 1; - } - // three quarters full - - assert_eq!(m.len(), i); - assert_eq!(m.table.capacity(), cap); - - for _ in range(0, cap / 4) { - m.insert(i, i); - i += 1; - } - // half full - - let new_cap = m.table.capacity(); - assert_eq!(new_cap, cap * 2); - - for _ in range(0, cap / 2 - 1) { - i -= 1; - m.remove(&i); - assert_eq!(m.table.capacity(), new_cap); - } - // A little more than one quarter full. - // Shrinking starts as we remove more elements: - for _ in range(0, cap / 2 - 1) { - i -= 1; - m.remove(&i); - } - - assert_eq!(m.len(), i); - assert!(!m.is_empty()); - assert_eq!(m.table.capacity(), cap); - } - - #[test] - fn test_find_equiv() { - let mut m = HashMap::new(); - - let (foo, bar, baz) = (1i,2i,3i); - m.insert("foo".to_string(), foo); - m.insert("bar".to_string(), bar); - m.insert("baz".to_string(), baz); - - - assert_eq!(m.find_equiv("foo"), Some(&foo)); - assert_eq!(m.find_equiv("bar"), Some(&bar)); - assert_eq!(m.find_equiv("baz"), Some(&baz)); - - assert_eq!(m.find_equiv("qux"), None); - } - - #[test] - fn test_from_iter() { - let xs = [(1i, 1i), (2, 2), (3, 3), (4, 4), (5, 5), (6, 6)]; - - let map: HashMap = xs.iter().map(|&x| x).collect(); - - for &(k, v) in xs.iter() { - assert_eq!(map.find(&k), Some(&v)); - } - } - - #[test] - fn test_size_hint() { - let xs = [(1i, 1i), (2, 2), (3, 3), (4, 4), (5, 5), (6, 6)]; - - let map: HashMap = xs.iter().map(|&x| x).collect(); - - let mut iter = map.iter(); - - for _ in iter.by_ref().take(3) {} - - assert_eq!(iter.size_hint(), (3, Some(3))); - } - - #[test] - fn test_mut_size_hint() { - let xs = [(1i, 1i), (2, 2), (3, 3), (4, 4), (5, 5), (6, 6)]; - - let mut map: HashMap = xs.iter().map(|&x| x).collect(); - - let mut iter = map.iter_mut(); - - for _ in iter.by_ref().take(3) {} - - assert_eq!(iter.size_hint(), (3, Some(3))); - } - - #[test] - fn test_index() { - let mut map: HashMap = HashMap::new(); - - map.insert(1, 2); - map.insert(2, 1); - map.insert(3, 4); - - assert_eq!(map[2], 1); - } - - #[test] - #[should_fail] - fn test_index_nonexistent() { - let mut map: HashMap = HashMap::new(); - - map.insert(1, 2); - map.insert(2, 1); - map.insert(3, 4); - - map[4]; - } - - #[test] - fn test_entry(){ - let xs = [(1i, 10i), (2, 20), (3, 30), (4, 40), (5, 50), (6, 60)]; - - let mut map: HashMap = xs.iter().map(|&x| x).collect(); - - // Existing key (insert) - match map.entry(1) { - Vacant(_) => unreachable!(), - Occupied(mut view) => { - assert_eq!(view.get(), &10); - assert_eq!(view.set(100), 10); - } - } - assert_eq!(map.find(&1).unwrap(), &100); - assert_eq!(map.len(), 6); - - - // Existing key (update) - match map.entry(2) { - Vacant(_) => unreachable!(), - Occupied(mut view) => { - let v = view.get_mut(); - let new_v = (*v) * 10; - *v = new_v; - } - } - assert_eq!(map.find(&2).unwrap(), &200); - assert_eq!(map.len(), 6); - - // Existing key (take) - match map.entry(3) { - Vacant(_) => unreachable!(), - Occupied(view) => { - assert_eq!(view.take(), 30); - } - } - assert_eq!(map.find(&3), None); - assert_eq!(map.len(), 5); - - - // Inexistent key (insert) - match map.entry(10) { - Occupied(_) => unreachable!(), - Vacant(view) => { - assert_eq!(*view.set(1000), 1000); - } - } - assert_eq!(map.find(&10).unwrap(), &1000); - assert_eq!(map.len(), 6); - } -} diff --git a/src/libstd/collections/hashmap/mod.rs b/src/libstd/collections/hashmap/mod.rs deleted file mode 100644 index 6508d4609f1..00000000000 --- a/src/libstd/collections/hashmap/mod.rs +++ /dev/null @@ -1,33 +0,0 @@ -// Copyright 2014 The Rust Project Developers. See the COPYRIGHT -// file at the top-level directory of this distribution and at -// http://rust-lang.org/COPYRIGHT. -// -// Licensed under the Apache License, Version 2.0 or the MIT license -// , at your -// option. This file may not be copied, modified, or distributed -// except according to those terms. - -//! Unordered containers, implemented as hash-tables - -pub use self::map::HashMap; -pub use self::map::Entries; -pub use self::map::MutEntries; -pub use self::map::MoveEntries; -pub use self::map::Entry; -pub use self::map::Occupied; -pub use self::map::Vacant; -pub use self::map::OccupiedEntry; -pub use self::map::VacantEntry; -pub use self::map::Keys; -pub use self::map::Values; -pub use self::map::INITIAL_CAPACITY; -pub use self::set::HashSet; -pub use self::set::SetItems; -pub use self::set::SetMoveItems; -pub use self::set::SetAlgebraItems; - -mod bench; -mod map; -mod set; -mod table; diff --git a/src/libstd/collections/hashmap/set.rs b/src/libstd/collections/hashmap/set.rs deleted file mode 100644 index 69f3812425f..00000000000 --- a/src/libstd/collections/hashmap/set.rs +++ /dev/null @@ -1,834 +0,0 @@ -// Copyright 2014 The Rust Project Developers. See the COPYRIGHT -// file at the top-level directory of this distribution and at -// http://rust-lang.org/COPYRIGHT. -// -// Licensed under the Apache License, Version 2.0 or the MIT license -// , at your -// option. This file may not be copied, modified, or distributed -// except according to those terms. -// -// ignore-lexer-test FIXME #15883 - -use clone::Clone; -use cmp::{Eq, Equiv, PartialEq}; -use core::kinds::Sized; -use default::Default; -use fmt::Show; -use fmt; -use hash::{Hash, Hasher, RandomSipHasher}; -use iter::{Iterator, FromIterator, FilterMap, Chain, Repeat, Zip, Extendable}; -use iter; -use option::{Some, None}; -use result::{Ok, Err}; - -use super::{HashMap, Entries, MoveEntries, INITIAL_CAPACITY}; - - -// Future Optimization (FIXME!) -// ============================= -// -// Iteration over zero sized values is a noop. There is no need -// for `bucket.val` in the case of HashSet. I suppose we would need HKT -// to get rid of it properly. - -/// An implementation of a hash set using the underlying representation of a -/// HashMap where the value is (). As with the `HashMap` type, a `HashSet` -/// requires that the elements implement the `Eq` and `Hash` traits. -/// -/// # Example -/// -/// ``` -/// use std::collections::HashSet; -/// // Type inference lets us omit an explicit type signature (which -/// // would be `HashSet<&str>` in this example). -/// let mut books = HashSet::new(); -/// -/// // Add some books. -/// books.insert("A Dance With Dragons"); -/// books.insert("To Kill a Mockingbird"); -/// books.insert("The Odyssey"); -/// books.insert("The Great Gatsby"); -/// -/// // Check for a specific one. -/// if !books.contains(&("The Winds of Winter")) { -/// println!("We have {} books, but The Winds of Winter ain't one.", -/// books.len()); -/// } -/// -/// // Remove a book. -/// books.remove(&"The Odyssey"); -/// -/// // Iterate over everything. -/// for book in books.iter() { -/// println!("{}", *book); -/// } -/// ``` -/// -/// The easiest way to use `HashSet` with a custom type is to derive -/// `Eq` and `Hash`. We must also derive `PartialEq`, this will in the -/// future be implied by `Eq`. -/// -/// ``` -/// use std::collections::HashSet; -/// #[deriving(Hash, Eq, PartialEq, Show)] -/// struct Viking<'a> { -/// name: &'a str, -/// power: uint, -/// } -/// -/// let mut vikings = HashSet::new(); -/// -/// vikings.insert(Viking { name: "Einar", power: 9u }); -/// vikings.insert(Viking { name: "Einar", power: 9u }); -/// vikings.insert(Viking { name: "Olaf", power: 4u }); -/// vikings.insert(Viking { name: "Harald", power: 8u }); -/// -/// // Use derived implementation to print the vikings. -/// for x in vikings.iter() { -/// println!("{}", x); -/// } -/// ``` -#[deriving(Clone)] -pub struct HashSet { - map: HashMap -} - -impl HashSet { - /// Create an empty HashSet. - /// - /// # Example - /// - /// ``` - /// use std::collections::HashSet; - /// let mut set: HashSet = HashSet::new(); - /// ``` - #[inline] - pub fn new() -> HashSet { - HashSet::with_capacity(INITIAL_CAPACITY) - } - - /// Create an empty HashSet with space for at least `n` elements in - /// the hash table. - /// - /// # Example - /// - /// ``` - /// use std::collections::HashSet; - /// let mut set: HashSet = HashSet::with_capacity(10); - /// ``` - #[inline] - pub fn with_capacity(capacity: uint) -> HashSet { - HashSet { map: HashMap::with_capacity(capacity) } - } -} - -impl, S, H: Hasher> HashSet { - /// Creates a new empty hash set which will use the given hasher to hash - /// keys. - /// - /// The hash set is also created with the default initial capacity. - /// - /// # Example - /// - /// ``` - /// use std::collections::HashSet; - /// use std::hash::sip::SipHasher; - /// - /// let h = SipHasher::new(); - /// let mut set = HashSet::with_hasher(h); - /// set.insert(2u); - /// ``` - #[inline] - pub fn with_hasher(hasher: H) -> HashSet { - HashSet::with_capacity_and_hasher(INITIAL_CAPACITY, hasher) - } - - /// Create an empty HashSet with space for at least `capacity` - /// elements in the hash table, using `hasher` to hash the keys. - /// - /// Warning: `hasher` is normally randomly generated, and - /// is designed to allow `HashSet`s to be resistant to attacks that - /// cause many collisions and very poor performance. Setting it - /// manually using this function can expose a DoS attack vector. - /// - /// # Example - /// - /// ``` - /// use std::collections::HashSet; - /// use std::hash::sip::SipHasher; - /// - /// let h = SipHasher::new(); - /// let mut set = HashSet::with_capacity_and_hasher(10u, h); - /// set.insert(1i); - /// ``` - #[inline] - pub fn with_capacity_and_hasher(capacity: uint, hasher: H) -> HashSet { - HashSet { map: HashMap::with_capacity_and_hasher(capacity, hasher) } - } - - /// Reserve space for at least `n` elements in the hash table. - /// - /// # Example - /// - /// ``` - /// use std::collections::HashSet; - /// let mut set: HashSet = HashSet::new(); - /// set.reserve(10); - /// ``` - pub fn reserve(&mut self, n: uint) { - self.map.reserve(n) - } - - /// Returns true if the hash set contains a value equivalent to the - /// given query value. - /// - /// # Example - /// - /// This is a slightly silly example where we define the number's - /// parity as the equivalance class. It is important that the - /// values hash the same, which is why we implement `Hash`. - /// - /// ``` - /// use std::collections::HashSet; - /// use std::hash::Hash; - /// use std::hash::sip::SipState; - /// - /// #[deriving(Eq, PartialEq)] - /// struct EvenOrOdd { - /// num: uint - /// }; - /// - /// impl Hash for EvenOrOdd { - /// fn hash(&self, state: &mut SipState) { - /// let parity = self.num % 2; - /// parity.hash(state); - /// } - /// } - /// - /// impl Equiv for EvenOrOdd { - /// fn equiv(&self, other: &EvenOrOdd) -> bool { - /// self.num % 2 == other.num % 2 - /// } - /// } - /// - /// let mut set = HashSet::new(); - /// set.insert(EvenOrOdd { num: 3u }); - /// - /// assert!(set.contains_equiv(&EvenOrOdd { num: 3u })); - /// assert!(set.contains_equiv(&EvenOrOdd { num: 5u })); - /// assert!(!set.contains_equiv(&EvenOrOdd { num: 4u })); - /// assert!(!set.contains_equiv(&EvenOrOdd { num: 2u })); - /// - /// ``` - pub fn contains_equiv + Equiv>(&self, value: &Q) -> bool { - self.map.contains_key_equiv(value) - } - - /// An iterator visiting all elements in arbitrary order. - /// Iterator element type is &'a T. - /// - /// # Example - /// - /// ``` - /// use std::collections::HashSet; - /// let mut set = HashSet::new(); - /// set.insert("a"); - /// set.insert("b"); - /// - /// // Will print in an arbitrary order. - /// for x in set.iter() { - /// println!("{}", x); - /// } - /// ``` - pub fn iter<'a>(&'a self) -> SetItems<'a, T> { - self.map.keys() - } - - /// Deprecated: use `into_iter`. - #[deprecated = "use into_iter"] - pub fn move_iter(self) -> SetMoveItems { - self.into_iter() - } - - /// Creates a consuming iterator, that is, one that moves each value out - /// of the set in arbitrary order. The set cannot be used after calling - /// this. - /// - /// # Example - /// - /// ``` - /// use std::collections::HashSet; - /// let mut set = HashSet::new(); - /// set.insert("a".to_string()); - /// set.insert("b".to_string()); - /// - /// // Not possible to collect to a Vec with a regular `.iter()`. - /// let v: Vec = set.into_iter().collect(); - /// - /// // Will print in an arbitrary order. - /// for x in v.iter() { - /// println!("{}", x); - /// } - /// ``` - pub fn into_iter(self) -> SetMoveItems { - self.map.into_iter().map(|(k, _)| k) - } - - /// Visit the values representing the difference. - /// - /// # Example - /// - /// ``` - /// use std::collections::HashSet; - /// let a: HashSet = [1i, 2, 3].iter().map(|&x| x).collect(); - /// let b: HashSet = [4i, 2, 3, 4].iter().map(|&x| x).collect(); - /// - /// // Can be seen as `a - b`. - /// for x in a.difference(&b) { - /// println!("{}", x); // Print 1 - /// } - /// - /// let diff: HashSet = a.difference(&b).map(|&x| x).collect(); - /// assert_eq!(diff, [1i].iter().map(|&x| x).collect()); - /// - /// // Note that difference is not symmetric, - /// // and `b - a` means something else: - /// let diff: HashSet = b.difference(&a).map(|&x| x).collect(); - /// assert_eq!(diff, [4i].iter().map(|&x| x).collect()); - /// ``` - pub fn difference<'a>(&'a self, other: &'a HashSet) -> SetAlgebraItems<'a, T, H> { - Repeat::new(other).zip(self.iter()) - .filter_map(|(other, elt)| { - if !other.contains(elt) { Some(elt) } else { None } - }) - } - - /// Visit the values representing the symmetric difference. - /// - /// # Example - /// - /// ``` - /// use std::collections::HashSet; - /// let a: HashSet = [1i, 2, 3].iter().map(|&x| x).collect(); - /// let b: HashSet = [4i, 2, 3, 4].iter().map(|&x| x).collect(); - /// - /// // Print 1, 4 in arbitrary order. - /// for x in a.symmetric_difference(&b) { - /// println!("{}", x); - /// } - /// - /// let diff1: HashSet = a.symmetric_difference(&b).map(|&x| x).collect(); - /// let diff2: HashSet = b.symmetric_difference(&a).map(|&x| x).collect(); - /// - /// assert_eq!(diff1, diff2); - /// assert_eq!(diff1, [1i, 4].iter().map(|&x| x).collect()); - /// ``` - pub fn symmetric_difference<'a>(&'a self, other: &'a HashSet) - -> Chain, SetAlgebraItems<'a, T, H>> { - self.difference(other).chain(other.difference(self)) - } - - /// Visit the values representing the intersection. - /// - /// # Example - /// - /// ``` - /// use std::collections::HashSet; - /// let a: HashSet = [1i, 2, 3].iter().map(|&x| x).collect(); - /// let b: HashSet = [4i, 2, 3, 4].iter().map(|&x| x).collect(); - /// - /// // Print 2, 3 in arbitrary order. - /// for x in a.intersection(&b) { - /// println!("{}", x); - /// } - /// - /// let diff: HashSet = a.intersection(&b).map(|&x| x).collect(); - /// assert_eq!(diff, [2i, 3].iter().map(|&x| x).collect()); - /// ``` - pub fn intersection<'a>(&'a self, other: &'a HashSet) - -> SetAlgebraItems<'a, T, H> { - Repeat::new(other).zip(self.iter()) - .filter_map(|(other, elt)| { - if other.contains(elt) { Some(elt) } else { None } - }) - } - - /// Visit the values representing the union. - /// - /// # Example - /// - /// ``` - /// use std::collections::HashSet; - /// let a: HashSet = [1i, 2, 3].iter().map(|&x| x).collect(); - /// let b: HashSet = [4i, 2, 3, 4].iter().map(|&x| x).collect(); - /// - /// // Print 1, 2, 3, 4 in arbitrary order. - /// for x in a.union(&b) { - /// println!("{}", x); - /// } - /// - /// let diff: HashSet = a.union(&b).map(|&x| x).collect(); - /// assert_eq!(diff, [1i, 2, 3, 4].iter().map(|&x| x).collect()); - /// ``` - pub fn union<'a>(&'a self, other: &'a HashSet) - -> Chain, SetAlgebraItems<'a, T, H>> { - self.iter().chain(other.difference(self)) - } - - /// Return the number of elements in the set - /// - /// # Example - /// - /// ``` - /// use std::collections::HashSet; - /// - /// let mut v = HashSet::new(); - /// assert_eq!(v.len(), 0); - /// v.insert(1u); - /// assert_eq!(v.len(), 1); - /// ``` - pub fn len(&self) -> uint { self.map.len() } - - /// Returns true if the set contains no elements - /// - /// # Example - /// - /// ``` - /// use std::collections::HashSet; - /// - /// let mut v = HashSet::new(); - /// assert!(v.is_empty()); - /// v.insert(1u); - /// assert!(!v.is_empty()); - /// ``` - pub fn is_empty(&self) -> bool { self.map.len() == 0 } - - /// Clears the set, removing all values. - /// - /// # Example - /// - /// ``` - /// use std::collections::HashSet; - /// - /// let mut v = HashSet::new(); - /// v.insert(1u); - /// v.clear(); - /// assert!(v.is_empty()); - /// ``` - pub fn clear(&mut self) { self.map.clear() } - - /// Returns `true` if the set contains a value. - /// - /// # Example - /// - /// ``` - /// use std::collections::HashSet; - /// - /// let set: HashSet = [1, 2, 3].iter().map(|&x| x).collect(); - /// assert_eq!(set.contains(&1), true); - /// assert_eq!(set.contains(&4), false); - /// ``` - pub fn contains(&self, value: &T) -> bool { self.map.contains_key(value) } - - /// Returns `true` if the set has no elements in common with `other`. - /// This is equivalent to checking for an empty intersection. - /// - /// # Example - /// - /// ``` - /// use std::collections::HashSet; - /// - /// let a: HashSet = [1, 2, 3].iter().map(|&x| x).collect(); - /// let mut b: HashSet = HashSet::new(); - /// - /// assert_eq!(a.is_disjoint(&b), true); - /// b.insert(4); - /// assert_eq!(a.is_disjoint(&b), true); - /// b.insert(1); - /// assert_eq!(a.is_disjoint(&b), false); - /// ``` - pub fn is_disjoint(&self, other: &HashSet) -> bool { - self.iter().all(|v| !other.contains(v)) - } - - /// Returns `true` if the set is a subset of another. - /// - /// # Example - /// - /// ``` - /// use std::collections::HashSet; - /// - /// let sup: HashSet = [1, 2, 3].iter().map(|&x| x).collect(); - /// let mut set: HashSet = HashSet::new(); - /// - /// assert_eq!(set.is_subset(&sup), true); - /// set.insert(2); - /// assert_eq!(set.is_subset(&sup), true); - /// set.insert(4); - /// assert_eq!(set.is_subset(&sup), false); - /// ``` - pub fn is_subset(&self, other: &HashSet) -> bool { - self.iter().all(|v| other.contains(v)) - } - - /// Returns `true` if the set is a superset of another. - /// - /// # Example - /// - /// ``` - /// use std::collections::HashSet; - /// - /// let sub: HashSet = [1, 2].iter().map(|&x| x).collect(); - /// let mut set: HashSet = HashSet::new(); - /// - /// assert_eq!(set.is_superset(&sub), false); - /// - /// set.insert(0); - /// set.insert(1); - /// assert_eq!(set.is_superset(&sub), false); - /// - /// set.insert(2); - /// assert_eq!(set.is_superset(&sub), true); - /// ``` - #[inline] - pub fn is_superset(&self, other: &HashSet) -> bool { - other.is_subset(self) - } - - /// Adds a value to the set. Returns `true` if the value was not already - /// present in the set. - /// - /// # Example - /// - /// ``` - /// use std::collections::HashSet; - /// - /// let mut set = HashSet::new(); - /// - /// assert_eq!(set.insert(2u), true); - /// assert_eq!(set.insert(2), false); - /// assert_eq!(set.len(), 1); - /// ``` - pub fn insert(&mut self, value: T) -> bool { self.map.insert(value, ()) } - - /// Removes a value from the set. Returns `true` if the value was - /// present in the set. - /// - /// # Example - /// - /// ``` - /// use std::collections::HashSet; - /// - /// let mut set = HashSet::new(); - /// - /// set.insert(2u); - /// assert_eq!(set.remove(&2), true); - /// assert_eq!(set.remove(&2), false); - /// ``` - pub fn remove(&mut self, value: &T) -> bool { self.map.remove(value) } -} - -impl, S, H: Hasher> PartialEq for HashSet { - fn eq(&self, other: &HashSet) -> bool { - if self.len() != other.len() { return false; } - - self.iter().all(|key| other.contains(key)) - } -} - -impl, S, H: Hasher> Eq for HashSet {} - -impl + fmt::Show, S, H: Hasher> fmt::Show for HashSet { - fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { - try!(write!(f, "{{")); - - for (i, x) in self.iter().enumerate() { - if i != 0 { try!(write!(f, ", ")); } - try!(write!(f, "{}", *x)); - } - - write!(f, "}}") - } -} - -impl, S, H: Hasher + Default> FromIterator for HashSet { - fn from_iter>(iter: I) -> HashSet { - let (lower, _) = iter.size_hint(); - let mut set = HashSet::with_capacity_and_hasher(lower, Default::default()); - set.extend(iter); - set - } -} - -impl, S, H: Hasher + Default> Extendable for HashSet { - fn extend>(&mut self, mut iter: I) { - for k in iter { - self.insert(k); - } - } -} - -impl, S, H: Hasher + Default> Default for HashSet { - fn default() -> HashSet { - HashSet::with_hasher(Default::default()) - } -} - -/// HashSet iterator -pub type SetItems<'a, K> = - iter::Map<'static, (&'a K, &'a ()), &'a K, Entries<'a, K, ()>>; - -/// HashSet move iterator -pub type SetMoveItems = - iter::Map<'static, (K, ()), K, MoveEntries>; - -// `Repeat` is used to feed the filter closure an explicit capture -// of a reference to the other set -/// Set operations iterator -pub type SetAlgebraItems<'a, T, H> = - FilterMap<'static, (&'a HashSet, &'a T), &'a T, - Zip>, SetItems<'a, T>>>; - -#[cfg(test)] -mod test_set { - use prelude::*; - - use super::HashSet; - use slice::ImmutablePartialEqSlice; - - #[test] - fn test_disjoint() { - let mut xs = HashSet::new(); - let mut ys = HashSet::new(); - assert!(xs.is_disjoint(&ys)); - assert!(ys.is_disjoint(&xs)); - assert!(xs.insert(5i)); - assert!(ys.insert(11i)); - assert!(xs.is_disjoint(&ys)); - assert!(ys.is_disjoint(&xs)); - assert!(xs.insert(7)); - assert!(xs.insert(19)); - assert!(xs.insert(4)); - assert!(ys.insert(2)); - assert!(ys.insert(-11)); - assert!(xs.is_disjoint(&ys)); - assert!(ys.is_disjoint(&xs)); - assert!(ys.insert(7)); - assert!(!xs.is_disjoint(&ys)); - assert!(!ys.is_disjoint(&xs)); - } - - #[test] - fn test_subset_and_superset() { - let mut a = HashSet::new(); - assert!(a.insert(0i)); - assert!(a.insert(5)); - assert!(a.insert(11)); - assert!(a.insert(7)); - - let mut b = HashSet::new(); - assert!(b.insert(0i)); - assert!(b.insert(7)); - assert!(b.insert(19)); - assert!(b.insert(250)); - assert!(b.insert(11)); - assert!(b.insert(200)); - - assert!(!a.is_subset(&b)); - assert!(!a.is_superset(&b)); - assert!(!b.is_subset(&a)); - assert!(!b.is_superset(&a)); - - assert!(b.insert(5)); - - assert!(a.is_subset(&b)); - assert!(!a.is_superset(&b)); - assert!(!b.is_subset(&a)); - assert!(b.is_superset(&a)); - } - - #[test] - fn test_iterate() { - let mut a = HashSet::new(); - for i in range(0u, 32) { - assert!(a.insert(i)); - } - let mut observed: u32 = 0; - for k in a.iter() { - observed |= 1 << *k; - } - assert_eq!(observed, 0xFFFF_FFFF); - } - - #[test] - fn test_intersection() { - let mut a = HashSet::new(); - let mut b = HashSet::new(); - - assert!(a.insert(11i)); - assert!(a.insert(1)); - assert!(a.insert(3)); - assert!(a.insert(77)); - assert!(a.insert(103)); - assert!(a.insert(5)); - assert!(a.insert(-5)); - - assert!(b.insert(2i)); - assert!(b.insert(11)); - assert!(b.insert(77)); - assert!(b.insert(-9)); - assert!(b.insert(-42)); - assert!(b.insert(5)); - assert!(b.insert(3)); - - let mut i = 0; - let expected = [3, 5, 11, 77]; - for x in a.intersection(&b) { - assert!(expected.contains(x)); - i += 1 - } - assert_eq!(i, expected.len()); - } - - #[test] - fn test_difference() { - let mut a = HashSet::new(); - let mut b = HashSet::new(); - - assert!(a.insert(1i)); - assert!(a.insert(3)); - assert!(a.insert(5)); - assert!(a.insert(9)); - assert!(a.insert(11)); - - assert!(b.insert(3i)); - assert!(b.insert(9)); - - let mut i = 0; - let expected = [1, 5, 11]; - for x in a.difference(&b) { - assert!(expected.contains(x)); - i += 1 - } - assert_eq!(i, expected.len()); - } - - #[test] - fn test_symmetric_difference() { - let mut a = HashSet::new(); - let mut b = HashSet::new(); - - assert!(a.insert(1i)); - assert!(a.insert(3)); - assert!(a.insert(5)); - assert!(a.insert(9)); - assert!(a.insert(11)); - - assert!(b.insert(-2i)); - assert!(b.insert(3)); - assert!(b.insert(9)); - assert!(b.insert(14)); - assert!(b.insert(22)); - - let mut i = 0; - let expected = [-2, 1, 5, 11, 14, 22]; - for x in a.symmetric_difference(&b) { - assert!(expected.contains(x)); - i += 1 - } - assert_eq!(i, expected.len()); - } - - #[test] - fn test_union() { - let mut a = HashSet::new(); - let mut b = HashSet::new(); - - assert!(a.insert(1i)); - assert!(a.insert(3)); - assert!(a.insert(5)); - assert!(a.insert(9)); - assert!(a.insert(11)); - assert!(a.insert(16)); - assert!(a.insert(19)); - assert!(a.insert(24)); - - assert!(b.insert(-2i)); - assert!(b.insert(1)); - assert!(b.insert(5)); - assert!(b.insert(9)); - assert!(b.insert(13)); - assert!(b.insert(19)); - - let mut i = 0; - let expected = [-2, 1, 3, 5, 9, 11, 13, 16, 19, 24]; - for x in a.union(&b) { - assert!(expected.contains(x)); - i += 1 - } - assert_eq!(i, expected.len()); - } - - #[test] - fn test_from_iter() { - let xs = [1i, 2, 3, 4, 5, 6, 7, 8, 9]; - - let set: HashSet = xs.iter().map(|&x| x).collect(); - - for x in xs.iter() { - assert!(set.contains(x)); - } - } - - #[test] - fn test_move_iter() { - let hs = { - let mut hs = HashSet::new(); - - hs.insert('a'); - hs.insert('b'); - - hs - }; - - let v = hs.into_iter().collect::>(); - assert!(['a', 'b'] == v.as_slice() || ['b', 'a'] == v.as_slice()); - } - - #[test] - fn test_eq() { - // These constants once happened to expose a bug in insert(). - // I'm keeping them around to prevent a regression. - let mut s1 = HashSet::new(); - - s1.insert(1i); - s1.insert(2); - s1.insert(3); - - let mut s2 = HashSet::new(); - - s2.insert(1i); - s2.insert(2); - - assert!(s1 != s2); - - s2.insert(3); - - assert_eq!(s1, s2); - } - - #[test] - fn test_show() { - let mut set: HashSet = HashSet::new(); - let empty: HashSet = HashSet::new(); - - set.insert(1i); - set.insert(2); - - let set_str = format!("{}", set); - - assert!(set_str == "{1, 2}".to_string() || set_str == "{2, 1}".to_string()); - assert_eq!(format!("{}", empty), "{}".to_string()); - } -} diff --git a/src/libstd/collections/hashmap/table.rs b/src/libstd/collections/hashmap/table.rs deleted file mode 100644 index 4d73029b7b0..00000000000 --- a/src/libstd/collections/hashmap/table.rs +++ /dev/null @@ -1,907 +0,0 @@ -// Copyright 2014 The Rust Project Developers. See the COPYRIGHT -// file at the top-level directory of this distribution and at -// http://rust-lang.org/COPYRIGHT. -// -// Licensed under the Apache License, Version 2.0 or the MIT license -// , at your -// option. This file may not be copied, modified, or distributed -// except according to those terms. -// -// ignore-lexer-test FIXME #15883 - -use clone::Clone; -use cmp; -use hash::{Hash, Hasher}; -use iter::{Iterator, count}; -use kinds::{Sized, marker}; -use mem::{min_align_of, size_of}; -use mem; -use num::{CheckedAdd, CheckedMul, is_power_of_two}; -use ops::{Deref, DerefMut, Drop}; -use option::{Some, None, Option}; -use ptr::{RawPtr, copy_nonoverlapping_memory, zero_memory}; -use ptr; -use rt::heap::{allocate, deallocate}; - -const EMPTY_BUCKET: u64 = 0u64; - -/// The raw hashtable, providing safe-ish access to the unzipped and highly -/// optimized arrays of hashes, keys, and values. -/// -/// This design uses less memory and is a lot faster than the naive -/// `Vec>`, because we don't pay for the overhead of an -/// option on every element, and we get a generally more cache-aware design. -/// -/// Essential invariants of this structure: -/// -/// - if t.hashes[i] == EMPTY_BUCKET, then `Bucket::at_index(&t, i).raw` -/// points to 'undefined' contents. Don't read from it. This invariant is -/// enforced outside this module with the `EmptyBucket`, `FullBucket`, -/// and `SafeHash` types. -/// -/// - An `EmptyBucket` is only constructed at an index with -/// a hash of EMPTY_BUCKET. -/// -/// - A `FullBucket` is only constructed at an index with a -/// non-EMPTY_BUCKET hash. -/// -/// - A `SafeHash` is only constructed for non-`EMPTY_BUCKET` hash. We get -/// around hashes of zero by changing them to 0x8000_0000_0000_0000, -/// which will likely map to the same bucket, while not being confused -/// with "empty". -/// -/// - All three "arrays represented by pointers" are the same length: -/// `capacity`. This is set at creation and never changes. The arrays -/// are unzipped to save space (we don't have to pay for the padding -/// between odd sized elements, such as in a map from u64 to u8), and -/// be more cache aware (scanning through 8 hashes brings in at most -/// 2 cache lines, since they're all right beside each other). -/// -/// You can kind of think of this module/data structure as a safe wrapper -/// around just the "table" part of the hashtable. It enforces some -/// invariants at the type level and employs some performance trickery, -/// but in general is just a tricked out `Vec>`. -#[unsafe_no_drop_flag] -pub struct RawTable { - capacity: uint, - size: uint, - hashes: *mut u64, - // Because K/V do not appear directly in any of the types in the struct, - // inform rustc that in fact instances of K and V are reachable from here. - marker: marker::CovariantType<(K,V)>, -} - -struct RawBucket { - hash: *mut u64, - key: *mut K, - val: *mut V -} - -pub struct Bucket { - raw: RawBucket, - idx: uint, - table: M -} - -pub struct EmptyBucket { - raw: RawBucket, - idx: uint, - table: M -} - -pub struct FullBucket { - raw: RawBucket, - idx: uint, - table: M -} - -pub type EmptyBucketImm<'table, K, V> = EmptyBucket>; -pub type FullBucketImm<'table, K, V> = FullBucket>; - -pub type EmptyBucketMut<'table, K, V> = EmptyBucket>; -pub type FullBucketMut<'table, K, V> = FullBucket>; - -pub enum BucketState { - Empty(EmptyBucket), - Full(FullBucket), -} - -// A GapThenFull encapsulates the state of two consecutive buckets at once. -// The first bucket, called the gap, is known to be empty. -// The second bucket is full. -struct GapThenFull { - gap: EmptyBucket, - full: FullBucket, -} - -/// A hash that is not zero, since we use a hash of zero to represent empty -/// buckets. -#[deriving(PartialEq)] -pub struct SafeHash { - hash: u64, -} - -impl SafeHash { - /// Peek at the hash value, which is guaranteed to be non-zero. - #[inline(always)] - pub fn inspect(&self) -> u64 { self.hash } -} - -/// We need to remove hashes of 0. That's reserved for empty buckets. -/// This function wraps up `hash_keyed` to be the only way outside this -/// module to generate a SafeHash. -pub fn make_hash, S, H: Hasher>(hasher: &H, t: &T) -> SafeHash { - match hasher.hash(t) { - // This constant is exceedingly likely to hash to the same - // bucket, but it won't be counted as empty! Just so we can maintain - // our precious uniform distribution of initial indexes. - EMPTY_BUCKET => SafeHash { hash: 0x8000_0000_0000_0000 }, - h => SafeHash { hash: h }, - } -} - -// `replace` casts a `*u64` to a `*SafeHash`. Since we statically -// ensure that a `FullBucket` points to an index with a non-zero hash, -// and a `SafeHash` is just a `u64` with a different name, this is -// safe. -// -// This test ensures that a `SafeHash` really IS the same size as a -// `u64`. If you need to change the size of `SafeHash` (and -// consequently made this test fail), `replace` needs to be -// modified to no longer assume this. -#[test] -fn can_alias_safehash_as_u64() { - assert_eq!(size_of::(), size_of::()) -} - -impl RawBucket { - unsafe fn offset(self, count: int) -> RawBucket { - RawBucket { - hash: self.hash.offset(count), - key: self.key.offset(count), - val: self.val.offset(count), - } - } -} - -// For parameterizing over mutability. -impl<'t, K, V> Deref> for &'t RawTable { - fn deref(&self) -> &RawTable { - &**self - } -} - -impl<'t, K, V> Deref> for &'t mut RawTable { - fn deref(&self) -> &RawTable { - &**self - } -} - -impl<'t, K, V> DerefMut> for &'t mut RawTable { - fn deref_mut(&mut self) -> &mut RawTable { - &mut **self - } -} - -// Buckets hold references to the table. -impl FullBucket { - /// Borrow a reference to the table. - pub fn table(&self) -> &M { - &self.table - } - /// Move out the reference to the table. - pub fn into_table(self) -> M { - self.table - } - /// Get the raw index. - pub fn index(&self) -> uint { - self.idx - } -} - -impl EmptyBucket { - /// Borrow a reference to the table. - pub fn table(&self) -> &M { - &self.table - } - /// Move out the reference to the table. - pub fn into_table(self) -> M { - self.table - } -} - -impl Bucket { - /// Move out the reference to the table. - pub fn into_table(self) -> M { - self.table - } - /// Get the raw index. - pub fn index(&self) -> uint { - self.idx - } -} - -impl>> Bucket { - pub fn new(table: M, hash: &SafeHash) -> Bucket { - Bucket::at_index(table, hash.inspect() as uint) - } - - pub fn at_index(table: M, ib_index: uint) -> Bucket { - let ib_index = ib_index & (table.capacity() - 1); - Bucket { - raw: unsafe { - table.first_bucket_raw().offset(ib_index as int) - }, - idx: ib_index, - table: table - } - } - - pub fn first(table: M) -> Bucket { - Bucket { - raw: table.first_bucket_raw(), - idx: 0, - table: table - } - } - - /// Reads a bucket at a given index, returning an enum indicating whether - /// it's initialized or not. You need to match on this enum to get - /// the appropriate types to call most of the other functions in - /// this module. - pub fn peek(self) -> BucketState { - match unsafe { *self.raw.hash } { - EMPTY_BUCKET => - Empty(EmptyBucket { - raw: self.raw, - idx: self.idx, - table: self.table - }), - _ => - Full(FullBucket { - raw: self.raw, - idx: self.idx, - table: self.table - }) - } - } - - /// Modifies the bucket pointer in place to make it point to the next slot. - pub fn next(&mut self) { - // Branchless bucket iteration step. - // As we reach the end of the table... - // We take the current idx: 0111111b - // Xor it by its increment: ^ 1000000b - // ------------ - // 1111111b - // Then AND with the capacity: & 1000000b - // ------------ - // to get the backwards offset: 1000000b - // ... and it's zero at all other times. - let maybe_wraparound_dist = (self.idx ^ (self.idx + 1)) & self.table.capacity(); - // Finally, we obtain the offset 1 or the offset -cap + 1. - let dist = 1i - (maybe_wraparound_dist as int); - - self.idx += 1; - - unsafe { - self.raw = self.raw.offset(dist); - } - } -} - -impl>> EmptyBucket { - #[inline] - pub fn next(self) -> Bucket { - let mut bucket = self.into_bucket(); - bucket.next(); - bucket - } - - #[inline] - pub fn into_bucket(self) -> Bucket { - Bucket { - raw: self.raw, - idx: self.idx, - table: self.table - } - } - - pub fn gap_peek(self) -> Option> { - let gap = EmptyBucket { - raw: self.raw, - idx: self.idx, - table: () - }; - - match self.next().peek() { - Full(bucket) => { - Some(GapThenFull { - gap: gap, - full: bucket - }) - } - Empty(..) => None - } - } -} - -impl>> EmptyBucket { - /// Puts given key and value pair, along with the key's hash, - /// into this bucket in the hashtable. Note how `self` is 'moved' into - /// this function, because this slot will no longer be empty when - /// we return! A `FullBucket` is returned for later use, pointing to - /// the newly-filled slot in the hashtable. - /// - /// Use `make_hash` to construct a `SafeHash` to pass to this function. - pub fn put(mut self, hash: SafeHash, key: K, value: V) - -> FullBucket { - unsafe { - *self.raw.hash = hash.inspect(); - ptr::write(self.raw.key, key); - ptr::write(self.raw.val, value); - } - - self.table.size += 1; - - FullBucket { raw: self.raw, idx: self.idx, table: self.table } - } -} - -impl>> FullBucket { - #[inline] - pub fn next(self) -> Bucket { - let mut bucket = self.into_bucket(); - bucket.next(); - bucket - } - - #[inline] - pub fn into_bucket(self) -> Bucket { - Bucket { - raw: self.raw, - idx: self.idx, - table: self.table - } - } - - /// Get the distance between this bucket and the 'ideal' location - /// as determined by the key's hash stored in it. - /// - /// In the cited blog posts above, this is called the "distance to - /// initial bucket", or DIB. Also known as "probe count". - pub fn distance(&self) -> uint { - // Calculates the distance one has to travel when going from - // `hash mod capacity` onwards to `idx mod capacity`, wrapping around - // if the destination is not reached before the end of the table. - (self.idx - self.hash().inspect() as uint) & (self.table.capacity() - 1) - } - - #[inline] - pub fn hash(&self) -> SafeHash { - unsafe { - SafeHash { - hash: *self.raw.hash - } - } - } - - /// Gets references to the key and value at a given index. - pub fn read(&self) -> (&K, &V) { - unsafe { - (&*self.raw.key, - &*self.raw.val) - } - } -} - -impl>> FullBucket { - /// Removes this bucket's key and value from the hashtable. - /// - /// This works similarly to `put`, building an `EmptyBucket` out of the - /// taken bucket. - pub fn take(mut self) -> (EmptyBucket, K, V) { - let key = self.raw.key as *const K; - let val = self.raw.val as *const V; - - self.table.size -= 1; - - unsafe { - *self.raw.hash = EMPTY_BUCKET; - ( - EmptyBucket { - raw: self.raw, - idx: self.idx, - table: self.table - }, - ptr::read(key), - ptr::read(val) - ) - } - } - - pub fn replace(&mut self, h: SafeHash, k: K, v: V) -> (SafeHash, K, V) { - unsafe { - let old_hash = ptr::replace(self.raw.hash as *mut SafeHash, h); - let old_key = ptr::replace(self.raw.key, k); - let old_val = ptr::replace(self.raw.val, v); - - (old_hash, old_key, old_val) - } - } - - /// Gets mutable references to the key and value at a given index. - pub fn read_mut(&mut self) -> (&mut K, &mut V) { - unsafe { - (&mut *self.raw.key, - &mut *self.raw.val) - } - } -} - -impl<'t, K, V, M: Deref> + 't> FullBucket { - /// Exchange a bucket state for immutable references into the table. - /// Because the underlying reference to the table is also consumed, - /// no further changes to the structure of the table are possible; - /// in exchange for this, the returned references have a longer lifetime - /// than the references returned by `read()`. - pub fn into_refs(self) -> (&'t K, &'t V) { - unsafe { - (&*self.raw.key, - &*self.raw.val) - } - } -} - -impl<'t, K, V, M: DerefMut> + 't> FullBucket { - /// This works similarly to `into_refs`, exchanging a bucket state - /// for mutable references into the table. - pub fn into_mut_refs(self) -> (&'t mut K, &'t mut V) { - unsafe { - (&mut *self.raw.key, - &mut *self.raw.val) - } - } -} - -impl BucketState { - // For convenience. - pub fn expect_full(self) -> FullBucket { - match self { - Full(full) => full, - Empty(..) => panic!("Expected full bucket") - } - } -} - -impl>> GapThenFull { - #[inline] - pub fn full(&self) -> &FullBucket { - &self.full - } - - pub fn shift(mut self) -> Option> { - unsafe { - *self.gap.raw.hash = mem::replace(&mut *self.full.raw.hash, EMPTY_BUCKET); - copy_nonoverlapping_memory(self.gap.raw.key, self.full.raw.key as *const K, 1); - copy_nonoverlapping_memory(self.gap.raw.val, self.full.raw.val as *const V, 1); - } - - let FullBucket { raw: prev_raw, idx: prev_idx, .. } = self.full; - - match self.full.next().peek() { - Full(bucket) => { - self.gap.raw = prev_raw; - self.gap.idx = prev_idx; - - self.full = bucket; - - Some(self) - } - Empty(..) => None - } - } -} - - -/// Rounds up to a multiple of a power of two. Returns the closest multiple -/// of `target_alignment` that is higher or equal to `unrounded`. -/// -/// # Failure -/// -/// Fails if `target_alignment` is not a power of two. -fn round_up_to_next(unrounded: uint, target_alignment: uint) -> uint { - assert!(is_power_of_two(target_alignment)); - (unrounded + target_alignment - 1) & !(target_alignment - 1) -} - -#[test] -fn test_rounding() { - assert_eq!(round_up_to_next(0, 4), 0); - assert_eq!(round_up_to_next(1, 4), 4); - assert_eq!(round_up_to_next(2, 4), 4); - assert_eq!(round_up_to_next(3, 4), 4); - assert_eq!(round_up_to_next(4, 4), 4); - assert_eq!(round_up_to_next(5, 4), 8); -} - -// Returns a tuple of (key_offset, val_offset), -// from the start of a mallocated array. -fn calculate_offsets(hashes_size: uint, - keys_size: uint, keys_align: uint, - vals_align: uint) - -> (uint, uint) { - let keys_offset = round_up_to_next(hashes_size, keys_align); - let end_of_keys = keys_offset + keys_size; - - let vals_offset = round_up_to_next(end_of_keys, vals_align); - - (keys_offset, vals_offset) -} - -// Returns a tuple of (minimum required malloc alignment, hash_offset, -// array_size), from the start of a mallocated array. -fn calculate_allocation(hash_size: uint, hash_align: uint, - keys_size: uint, keys_align: uint, - vals_size: uint, vals_align: uint) - -> (uint, uint, uint) { - let hash_offset = 0; - let (_, vals_offset) = calculate_offsets(hash_size, - keys_size, keys_align, - vals_align); - let end_of_vals = vals_offset + vals_size; - - let min_align = cmp::max(hash_align, cmp::max(keys_align, vals_align)); - - (min_align, hash_offset, end_of_vals) -} - -#[test] -fn test_offset_calculation() { - assert_eq!(calculate_allocation(128, 8, 15, 1, 4, 4), (8, 0, 148)); - assert_eq!(calculate_allocation(3, 1, 2, 1, 1, 1), (1, 0, 6)); - assert_eq!(calculate_allocation(6, 2, 12, 4, 24, 8), (8, 0, 48)); - assert_eq!(calculate_offsets(128, 15, 1, 4), (128, 144)); - assert_eq!(calculate_offsets(3, 2, 1, 1), (3, 5)); - assert_eq!(calculate_offsets(6, 12, 4, 8), (8, 24)); -} - -impl RawTable { - /// Does not initialize the buckets. The caller should ensure they, - /// at the very least, set every hash to EMPTY_BUCKET. - unsafe fn new_uninitialized(capacity: uint) -> RawTable { - if capacity == 0 { - return RawTable { - size: 0, - capacity: 0, - hashes: 0 as *mut u64, - marker: marker::CovariantType, - }; - } - // No need for `checked_mul` before a more restrictive check performed - // later in this method. - let hashes_size = capacity * size_of::(); - let keys_size = capacity * size_of::< K >(); - let vals_size = capacity * size_of::< V >(); - - // Allocating hashmaps is a little tricky. We need to allocate three - // arrays, but since we know their sizes and alignments up front, - // we just allocate a single array, and then have the subarrays - // point into it. - // - // This is great in theory, but in practice getting the alignment - // right is a little subtle. Therefore, calculating offsets has been - // factored out into a different function. - let (malloc_alignment, hash_offset, size) = - calculate_allocation( - hashes_size, min_align_of::(), - keys_size, min_align_of::< K >(), - vals_size, min_align_of::< V >()); - - // One check for overflow that covers calculation and rounding of size. - let size_of_bucket = size_of::().checked_add(&size_of::()).unwrap() - .checked_add(&size_of::()).unwrap(); - assert!(size >= capacity.checked_mul(&size_of_bucket) - .expect("capacity overflow"), - "capacity overflow"); - - let buffer = allocate(size, malloc_alignment); - if buffer.is_null() { ::alloc::oom() } - - let hashes = buffer.offset(hash_offset as int) as *mut u64; - - RawTable { - capacity: capacity, - size: 0, - hashes: hashes, - marker: marker::CovariantType, - } - } - - fn first_bucket_raw(&self) -> RawBucket { - let hashes_size = self.capacity * size_of::(); - let keys_size = self.capacity * size_of::(); - - let buffer = self.hashes as *mut u8; - let (keys_offset, vals_offset) = calculate_offsets(hashes_size, - keys_size, min_align_of::(), - min_align_of::()); - - unsafe { - RawBucket { - hash: self.hashes, - key: buffer.offset(keys_offset as int) as *mut K, - val: buffer.offset(vals_offset as int) as *mut V - } - } - } - - /// Creates a new raw table from a given capacity. All buckets are - /// initially empty. - #[allow(experimental)] - pub fn new(capacity: uint) -> RawTable { - unsafe { - let ret = RawTable::new_uninitialized(capacity); - zero_memory(ret.hashes, capacity); - ret - } - } - - /// The hashtable's capacity, similar to a vector's. - pub fn capacity(&self) -> uint { - self.capacity - } - - /// The number of elements ever `put` in the hashtable, minus the number - /// of elements ever `take`n. - pub fn size(&self) -> uint { - self.size - } - - fn raw_buckets(&self) -> RawBuckets { - RawBuckets { - raw: self.first_bucket_raw(), - hashes_end: unsafe { - self.hashes.offset(self.capacity as int) - }, - marker: marker::ContravariantLifetime, - } - } - - pub fn iter(&self) -> Entries { - Entries { - iter: self.raw_buckets(), - elems_left: self.size(), - } - } - - pub fn iter_mut(&mut self) -> MutEntries { - MutEntries { - iter: self.raw_buckets(), - elems_left: self.size(), - } - } - - pub fn into_iter(self) -> MoveEntries { - let RawBuckets { raw, hashes_end, .. } = self.raw_buckets(); - // Replace the marker regardless of lifetime bounds on parameters. - MoveEntries { - iter: RawBuckets { - raw: raw, - hashes_end: hashes_end, - marker: marker::ContravariantLifetime, - }, - table: self, - } - } - - /// Returns an iterator that copies out each entry. Used while the table - /// is being dropped. - unsafe fn rev_move_buckets(&mut self) -> RevMoveBuckets { - let raw_bucket = self.first_bucket_raw(); - RevMoveBuckets { - raw: raw_bucket.offset(self.capacity as int), - hashes_end: raw_bucket.hash, - elems_left: self.size, - marker: marker::ContravariantLifetime, - } - } -} - -/// A raw iterator. The basis for some other iterators in this module. Although -/// this interface is safe, it's not used outside this module. -struct RawBuckets<'a, K, V> { - raw: RawBucket, - hashes_end: *mut u64, - marker: marker::ContravariantLifetime<'a>, -} - -impl<'a, K, V> Iterator> for RawBuckets<'a, K, V> { - fn next(&mut self) -> Option> { - while self.raw.hash != self.hashes_end { - unsafe { - // We are swapping out the pointer to a bucket and replacing - // it with the pointer to the next one. - let prev = ptr::replace(&mut self.raw, self.raw.offset(1)); - if *prev.hash != EMPTY_BUCKET { - return Some(prev); - } - } - } - - None - } -} - -/// An iterator that moves out buckets in reverse order. It leaves the table -/// in an an inconsistent state and should only be used for dropping -/// the table's remaining entries. It's used in the implementation of Drop. -struct RevMoveBuckets<'a, K, V> { - raw: RawBucket, - hashes_end: *mut u64, - elems_left: uint, - marker: marker::ContravariantLifetime<'a>, -} - -impl<'a, K, V> Iterator<(K, V)> for RevMoveBuckets<'a, K, V> { - fn next(&mut self) -> Option<(K, V)> { - if self.elems_left == 0 { - return None; - } - - loop { - debug_assert!(self.raw.hash != self.hashes_end); - - unsafe { - self.raw = self.raw.offset(-1); - - if *self.raw.hash != EMPTY_BUCKET { - self.elems_left -= 1; - return Some(( - ptr::read(self.raw.key as *const K), - ptr::read(self.raw.val as *const V) - )); - } - } - } - } -} - -/// Iterator over shared references to entries in a table. -pub struct Entries<'a, K: 'a, V: 'a> { - iter: RawBuckets<'a, K, V>, - elems_left: uint, -} - -/// Iterator over mutable references to entries in a table. -pub struct MutEntries<'a, K: 'a, V: 'a> { - iter: RawBuckets<'a, K, V>, - elems_left: uint, -} - -/// Iterator over the entries in a table, consuming the table. -pub struct MoveEntries { - table: RawTable, - iter: RawBuckets<'static, K, V> -} - -impl<'a, K, V> Iterator<(&'a K, &'a V)> for Entries<'a, K, V> { - fn next(&mut self) -> Option<(&'a K, &'a V)> { - self.iter.next().map(|bucket| { - self.elems_left -= 1; - unsafe { - (&*bucket.key, - &*bucket.val) - } - }) - } - - fn size_hint(&self) -> (uint, Option) { - (self.elems_left, Some(self.elems_left)) - } -} - -impl<'a, K, V> Iterator<(&'a K, &'a mut V)> for MutEntries<'a, K, V> { - fn next(&mut self) -> Option<(&'a K, &'a mut V)> { - self.iter.next().map(|bucket| { - self.elems_left -= 1; - unsafe { - (&*bucket.key, - &mut *bucket.val) - } - }) - } - - fn size_hint(&self) -> (uint, Option) { - (self.elems_left, Some(self.elems_left)) - } -} - -impl Iterator<(SafeHash, K, V)> for MoveEntries { - fn next(&mut self) -> Option<(SafeHash, K, V)> { - self.iter.next().map(|bucket| { - self.table.size -= 1; - unsafe { - ( - SafeHash { - hash: *bucket.hash, - }, - ptr::read(bucket.key as *const K), - ptr::read(bucket.val as *const V) - ) - } - }) - } - - fn size_hint(&self) -> (uint, Option) { - let size = self.table.size(); - (size, Some(size)) - } -} - -impl Clone for RawTable { - fn clone(&self) -> RawTable { - unsafe { - let mut new_ht = RawTable::new_uninitialized(self.capacity()); - - { - let cap = self.capacity(); - let mut new_buckets = Bucket::first(&mut new_ht); - let mut buckets = Bucket::first(self); - while buckets.index() != cap { - match buckets.peek() { - Full(full) => { - let (h, k, v) = { - let (k, v) = full.read(); - (full.hash(), k.clone(), v.clone()) - }; - *new_buckets.raw.hash = h.inspect(); - ptr::write(new_buckets.raw.key, k); - ptr::write(new_buckets.raw.val, v); - } - Empty(..) => { - *new_buckets.raw.hash = EMPTY_BUCKET; - } - } - new_buckets.next(); - buckets.next(); - } - }; - - new_ht.size = self.size(); - - new_ht - } - } -} - -#[unsafe_destructor] -impl Drop for RawTable { - fn drop(&mut self) { - if self.hashes.is_null() { - return; - } - // This is done in reverse because we've likely partially taken - // some elements out with `.into_iter()` from the front. - // Check if the size is 0, so we don't do a useless scan when - // dropping empty tables such as on resize. - // Also avoid double drop of elements that have been already moved out. - unsafe { - for _ in self.rev_move_buckets() {} - } - - let hashes_size = self.capacity * size_of::(); - let keys_size = self.capacity * size_of::(); - let vals_size = self.capacity * size_of::(); - let (align, _, size) = calculate_allocation(hashes_size, min_align_of::(), - keys_size, min_align_of::(), - vals_size, min_align_of::()); - - unsafe { - deallocate(self.hashes as *mut u8, size, align); - // Remember how everything was allocated out of one buffer - // during initialization? We only need one call to free here. - } - } -} diff --git a/src/libstd/collections/mod.rs b/src/libstd/collections/mod.rs index be9e22ee9d1..13486d4b8f8 100644 --- a/src/libstd/collections/mod.rs +++ b/src/libstd/collections/mod.rs @@ -24,9 +24,9 @@ //! Rust's collections can be grouped into four major categories: //! //! * Sequences: `Vec`, `RingBuf`, `DList`, `BitV` -//! * Maps: `HashMap`, `BTreeMap`, `TreeMap`, `TrieMap`, `SmallIntMap`, `LruCache` +//! * Maps: `HashMap`, `BTreeMap`, `TreeMap`, `TrieMap`, `VecMap`, `LruCache` //! * Sets: `HashSet`, `BTreeSet`, `TreeSet`, `TrieSet`, `BitVSet`, `EnumSet` -//! * Misc: `PriorityQueue` +//! * Misc: `BinaryHeap` //! //! # When Should You Use Which Collection? //! @@ -74,7 +74,7 @@ //! * You want a `HashMap`, but with many potentially large `uint` keys. //! * You want a `BTreeMap`, but with potentially large `uint` keys. //! -//! ### Use a `SmallIntMap` when: +//! ### Use a `VecMap` when: //! * You want a `HashMap` but with known to be small `uint` keys. //! * You want a `BTreeMap`, but with known to be small `uint` keys. //! @@ -88,12 +88,12 @@ //! * You want a bitvector. //! //! ### Use a `BitVSet` when: -//! * You want a `SmallIntSet`. +//! * You want a `VecSet`. //! //! ### Use an `EnumSet` when: //! * You want a C-like enum, stored in a single `uint`. //! -//! ### Use a `PriorityQueue` when: +//! ### Use a `BinaryHeap` when: //! * You want to store a bunch of elements, but only ever want to process the "biggest" //! or "most important" one at any given time. //! * You want a priority queue. @@ -266,7 +266,7 @@ //! #### Counting the number of times each character in a string occurs //! //! ``` -//! use std::collections::btree::{BTreeMap, Occupied, Vacant}; +//! use std::collections::btree_map::{BTreeMap, Occupied, Vacant}; //! //! let mut count = BTreeMap::new(); //! let message = "she sells sea shells by the sea shore"; @@ -293,7 +293,7 @@ //! #### Tracking the inebriation of customers at a bar //! //! ``` -//! use std::collections::btree::{BTreeMap, Occupied, Vacant}; +//! use std::collections::btree_map::{BTreeMap, Occupied, Vacant}; //! //! // A client of the bar. They have an id and a blood alcohol level. //! struct Person { id: u32, blood_alcohol: f32 }; @@ -328,14 +328,27 @@ #![experimental] -pub use core_collections::{Bitv, BitvSet, BTreeMap, BTreeSet, DList, EnumSet}; -pub use core_collections::{PriorityQueue, RingBuf, SmallIntMap}; -pub use core_collections::{TreeMap, TreeSet, TrieMap, TrieSet}; -pub use core_collections::{bitv, btree, dlist, enum_set}; -pub use core_collections::{priority_queue, ringbuf, smallintmap, treemap, trie}; +pub use core_collections::{BinaryHeap, Bitv, BitvSet, BTreeMap, BTreeSet}; +pub use core_collections::{DList, EnumSet, RingBuf}; +pub use core_collections::{TreeMap, TreeSet, TrieMap, TrieSet, VecMap}; -pub use self::hashmap::{HashMap, HashSet}; +pub use core_collections::{binary_heap, bitv, bitv_set, btree_map, btree_set, dlist, enum_set}; +pub use core_collections::{ring_buf, tree_map, tree_set, trie_map, trie_set, vec_map}; + +pub use self::hash_map::HashMap; +pub use self::hash_set::HashSet; pub use self::lru_cache::LruCache; -pub mod hashmap; +mod hash; + +pub mod hash_map { + //! A hashmap + pub use super::hash::map::*; +} + +pub mod hash_set { + //! A hashset + pub use super::hash::set::*; +} + pub mod lru_cache; -- cgit 1.4.1-3-g733a5