about summary refs log tree commit diff
path: root/src/libcore/hashmap.rs
diff options
context:
space:
mode:
authorTim Chevalier <chevalier@alum.wellesley.edu>2013-01-23 20:10:47 -0800
committerTim Chevalier <chevalier@alum.wellesley.edu>2013-01-23 20:10:47 -0800
commita202dcccca09e49f65251f012f55c93f92c869a7 (patch)
tree7fc1ec5d206e1c862b72f7bdbcd697770cd4dfe5 /src/libcore/hashmap.rs
parent0e29e21281512f71d33a87995002bd438c5b42f1 (diff)
parentbba5520d62e0c662ec2e2ccabe725294e17e9738 (diff)
downloadrust-a202dcccca09e49f65251f012f55c93f92c869a7.tar.gz
rust-a202dcccca09e49f65251f012f55c93f92c869a7.zip
Merge pull request #4594 from thestinger/map
more work on the map trait and TreeMap/LinearMap
Diffstat (limited to 'src/libcore/hashmap.rs')
-rw-r--r--src/libcore/hashmap.rs649
1 files changed, 649 insertions, 0 deletions
diff --git a/src/libcore/hashmap.rs b/src/libcore/hashmap.rs
new file mode 100644
index 00000000000..40b80bddf84
--- /dev/null
+++ b/src/libcore/hashmap.rs
@@ -0,0 +1,649 @@
+// Copyright 2013 The Rust Project Developers. See the COPYRIGHT
+// file at the top-level directory of this distribution and at
+// http://rust-lang.org/COPYRIGHT.
+//
+// Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
+// http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
+// <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
+// option. This file may not be copied, modified, or distributed
+// except according to those terms.
+
+//! Sendable hash maps.
+
+// NB: transitionary, de-mode-ing.
+#[forbid(deprecated_mode)];
+#[forbid(deprecated_pattern)];
+
+use cmp::Eq;
+use hash::Hash;
+use prelude::*;
+use to_bytes::IterBytes;
+
+/// Open addressing with linear probing.
+pub mod linear {
+    use iter::BaseIter;
+    use container::{Container, Mutable, Map, Set};
+    use cmp::Eq;
+    use cmp;
+    use hash::Hash;
+    use kinds::Copy;
+    use option::{None, Option, Some};
+    use option;
+    use rand;
+    use to_bytes::IterBytes;
+    use uint;
+    use vec;
+
+    const INITIAL_CAPACITY: uint = 32u; // 2^5
+
+    struct Bucket<K:Eq Hash,V> {
+        hash: uint,
+        key: K,
+        value: V,
+    }
+    pub struct LinearMap<K:Eq Hash,V> {
+        k0: u64,
+        k1: u64,
+        resize_at: uint,
+        size: uint,
+        buckets: ~[Option<Bucket<K,V>>],
+    }
+
+    // FIXME(#3148) -- we could rewrite found_entry
+    // to have type option<&bucket<K,V>> which would be nifty
+    // However, that won't work until #3148 is fixed
+    enum SearchResult {
+        FoundEntry(uint), FoundHole(uint), TableFull
+    }
+
+    fn resize_at(capacity: uint) -> uint {
+        ((capacity as float) * 3. / 4.) as uint
+    }
+
+    pub fn LinearMap<K:Eq Hash,V>() -> LinearMap<K,V> {
+        linear_map_with_capacity(INITIAL_CAPACITY)
+    }
+
+    pub fn linear_map_with_capacity<K:Eq Hash,V>(
+        initial_capacity: uint) -> LinearMap<K,V> {
+        let r = rand::Rng();
+        linear_map_with_capacity_and_keys(r.gen_u64(), r.gen_u64(),
+                                          initial_capacity)
+    }
+
+    fn linear_map_with_capacity_and_keys<K:Eq Hash,V> (
+        k0: u64, k1: u64,
+        initial_capacity: uint) -> LinearMap<K,V> {
+        LinearMap {
+            k0: k0, k1: k1,
+            resize_at: resize_at(initial_capacity),
+            size: 0,
+            buckets: vec::from_fn(initial_capacity, |_i| None)
+        }
+    }
+
+    priv impl<K:Hash IterBytes Eq, V> LinearMap<K,V> {
+        #[inline(always)]
+        pure fn to_bucket(&const self,
+                          h: uint) -> uint {
+            // FIXME(#3041) borrow a more sophisticated technique here from
+            // Gecko, for example borrowing from Knuth, as Eich so
+            // colorfully argues for here:
+            // https://bugzilla.mozilla.org/show_bug.cgi?id=743107#c22
+            h % self.buckets.len()
+        }
+
+        #[inline(always)]
+        pure fn next_bucket(&const self,
+                            idx: uint,
+                            len_buckets: uint) -> uint {
+            let n = (idx + 1) % len_buckets;
+            debug!("next_bucket(%?, %?) = %?", idx, len_buckets, n);
+            return n;
+        }
+
+        #[inline(always)]
+        pure fn bucket_sequence(&const self,
+                                hash: uint,
+                                op: fn(uint) -> bool) -> uint {
+            let start_idx = self.to_bucket(hash);
+            let len_buckets = self.buckets.len();
+            let mut idx = start_idx;
+            loop {
+                if !op(idx) {
+                    return idx;
+                }
+                idx = self.next_bucket(idx, len_buckets);
+                if idx == start_idx {
+                    return start_idx;
+                }
+            }
+        }
+
+        #[inline(always)]
+        pure fn bucket_for_key(&const self,
+                               buckets: &[Option<Bucket<K,V>>],
+                               k: &K) -> SearchResult {
+            let hash = k.hash_keyed(self.k0, self.k1) as uint;
+            self.bucket_for_key_with_hash(buckets, hash, k)
+        }
+
+        #[inline(always)]
+        pure fn bucket_for_key_with_hash(&const self,
+                                         buckets: &[Option<Bucket<K,V>>],
+                                         hash: uint,
+                                         k: &K) -> SearchResult {
+            let _ = for self.bucket_sequence(hash) |i| {
+                match buckets[i] {
+                    Some(ref bkt) => if bkt.hash == hash && *k == bkt.key {
+                        return FoundEntry(i);
+                    },
+                    None => return FoundHole(i)
+                }
+            };
+            return TableFull;
+        }
+
+        /// Expands the capacity of the array and re-inserts each
+        /// of the existing buckets.
+        fn expand(&mut self) {
+            let old_capacity = self.buckets.len();
+            let new_capacity = old_capacity * 2;
+            self.resize_at = ((new_capacity as float) * 3.0 / 4.0) as uint;
+
+            let mut old_buckets = vec::from_fn(new_capacity, |_i| None);
+            self.buckets <-> old_buckets;
+
+            self.size = 0;
+            for uint::range(0, old_capacity) |i| {
+                let mut bucket = None;
+                bucket <-> old_buckets[i];
+                self.insert_opt_bucket(move bucket);
+            }
+        }
+
+        fn insert_opt_bucket(&mut self, bucket: Option<Bucket<K,V>>) {
+            match move bucket {
+                Some(Bucket {hash: move hash,
+                             key: move key,
+                             value: move value}) => {
+                    self.insert_internal(hash, move key, move value);
+                }
+                None => {}
+            }
+        }
+
+        /// Inserts the key value pair into the buckets.
+        /// Assumes that there will be a bucket.
+        /// True if there was no previous entry with that key
+        fn insert_internal(&mut self, hash: uint, k: K, v: V) -> bool {
+            match self.bucket_for_key_with_hash(self.buckets, hash, &k) {
+                TableFull => { fail ~"Internal logic error"; }
+                FoundHole(idx) => {
+                    debug!("insert fresh (%?->%?) at idx %?, hash %?",
+                           k, v, idx, hash);
+                    self.buckets[idx] = Some(Bucket {hash: hash,
+                                                     key: move k,
+                                                     value: move v});
+                    self.size += 1;
+                    true
+                }
+                FoundEntry(idx) => {
+                    debug!("insert overwrite (%?->%?) at idx %?, hash %?",
+                           k, v, idx, hash);
+                    self.buckets[idx] = Some(Bucket {hash: hash,
+                                                     key: move k,
+                                                     value: move v});
+                    false
+                }
+            }
+        }
+
+        fn pop_internal(&mut self, hash: uint, k: &K) -> Option<V> {
+            // Removing from an open-addressed hashtable
+            // is, well, painful.  The problem is that
+            // the entry may lie on the probe path for other
+            // entries, so removing it would make you think that
+            // those probe paths are empty.
+            //
+            // To address this we basically have to keep walking,
+            // re-inserting entries we find until we reach an empty
+            // bucket.  We know we will eventually reach one because
+            // we insert one ourselves at the beginning (the removed
+            // entry).
+            //
+            // I found this explanation elucidating:
+            // http://www.maths.lse.ac.uk/Courses/MA407/del-hash.pdf
+            let mut idx = match self.bucket_for_key_with_hash(self.buckets,
+                                                              hash, k) {
+                TableFull | FoundHole(_) => return None,
+                FoundEntry(idx) => idx
+            };
+
+            let len_buckets = self.buckets.len();
+            let mut bucket = None;
+            self.buckets[idx] <-> bucket;
+
+            let value = match move bucket {
+                None => None,
+                Some(move bucket) => {
+                    let Bucket { value: move value, _ } = move bucket;
+                    Some(move value)
+                },
+            };
+
+            idx = self.next_bucket(idx, len_buckets);
+            while self.buckets[idx].is_some() {
+                let mut bucket = None;
+                bucket <-> self.buckets[idx];
+                self.insert_opt_bucket(move bucket);
+                idx = self.next_bucket(idx, len_buckets);
+            }
+            self.size -= 1;
+
+            move value
+
+        }
+
+        fn search(&self,
+                  hash: uint,
+                  op: fn(x: &Option<Bucket<K,V>>) -> bool) {
+            let _ = self.bucket_sequence(hash, |i| op(&self.buckets[i]));
+        }
+    }
+
+    impl <K: Hash IterBytes Eq, V> LinearMap<K, V>: Container {
+        /// Return the number of elements in the map
+        pure fn len(&self) -> uint { self.size }
+
+        /// Return true if the map contains no elements
+        pure fn is_empty(&self) -> bool { self.len() == 0 }
+    }
+
+    impl <K: Hash IterBytes Eq, V> LinearMap<K, V>: Mutable {
+        /// Clear the map, removing all key-value pairs.
+        fn clear(&mut self) {
+            for uint::range(0, self.buckets.len()) |idx| {
+                self.buckets[idx] = None;
+            }
+            self.size = 0;
+        }
+    }
+
+    impl <K: Hash IterBytes Eq, V> LinearMap<K, V>: Map<K, V> {
+        /// Return true if the map contains a value for the specified key
+        pure fn contains_key(&self, k: &K) -> bool {
+            match self.bucket_for_key(self.buckets, k) {
+                FoundEntry(_) => {true}
+                TableFull | FoundHole(_) => {false}
+            }
+        }
+
+        /// Visit all key-value pairs
+        pure fn each(&self, blk: fn(k: &K, v: &V) -> bool) {
+            for vec::each(self.buckets) |slot| {
+                let mut broke = false;
+                do slot.iter |bucket| {
+                    if !blk(&bucket.key, &bucket.value) {
+                        broke = true; // FIXME(#3064) just write "break;"
+                    }
+                }
+                if broke { break; }
+            }
+        }
+
+        /// Visit all keys
+        pure fn each_key(&self, blk: fn(k: &K) -> bool) {
+            self.each(|k, _v| blk(k))
+        }
+
+        /// Visit all values
+        pure fn each_value(&self, blk: fn(v: &V) -> bool) {
+            self.each(|_k, v| blk(v))
+        }
+
+        /// Return the value corresponding to the key in the map
+        pure fn find(&self, k: &K) -> Option<&self/V> {
+            match self.bucket_for_key(self.buckets, k) {
+                FoundEntry(idx) => {
+                    match self.buckets[idx] {
+                        Some(ref bkt) => {
+                            // FIXME(#3148)---should be inferred
+                            let bkt: &self/Bucket<K,V> = bkt;
+                            Some(&bkt.value)
+                        }
+                        None => {
+                            fail ~"LinearMap::find: internal logic error"
+                        }
+                    }
+                }
+                TableFull | FoundHole(_) => {
+                    None
+                }
+            }
+        }
+
+        /// Insert a key-value pair into the map. An existing value for a
+        /// key is replaced by the new value. Return true if the key did
+        /// not already exist in the map.
+        fn insert(&mut self, k: K, v: V) -> bool {
+            if self.size >= self.resize_at {
+                // n.b.: We could also do this after searching, so
+                // that we do not resize if this call to insert is
+                // simply going to update a key in place.  My sense
+                // though is that it's worse to have to search through
+                // buckets to find the right spot twice than to just
+                // resize in this corner case.
+                self.expand();
+            }
+
+            let hash = k.hash_keyed(self.k0, self.k1) as uint;
+            self.insert_internal(hash, move k, move v)
+        }
+
+        /// Remove a key-value pair from the map. Return true if the key
+        /// was present in the map, otherwise false.
+        fn remove(&mut self, k: &K) -> bool {
+            match self.pop(k) {
+                Some(_) => true,
+                None => false,
+            }
+        }
+    }
+
+    impl<K:Hash IterBytes Eq,V> LinearMap<K,V> {
+        static fn new() -> LinearMap<K, V> {
+            linear_map_with_capacity(INITIAL_CAPACITY)
+        }
+
+        fn pop(&mut self, k: &K) -> Option<V> {
+            let hash = k.hash_keyed(self.k0, self.k1) as uint;
+            self.pop_internal(hash, k)
+        }
+
+        fn swap(&mut self, k: K, v: V) -> Option<V> {
+            // this could be faster.
+            let hash = k.hash_keyed(self.k0, self.k1) as uint;
+            let old_value = self.pop_internal(hash, &k);
+
+            if self.size >= self.resize_at {
+                // n.b.: We could also do this after searching, so
+                // that we do not resize if this call to insert is
+                // simply going to update a key in place.  My sense
+                // though is that it's worse to have to search through
+                // buckets to find the right spot twice than to just
+                // resize in this corner case.
+                self.expand();
+            }
+
+            self.insert_internal(hash, move k, move v);
+
+            move old_value
+        }
+
+        fn consume(&mut self, f: fn(K, V)) {
+            let mut buckets = ~[];
+            self.buckets <-> buckets;
+            self.size = 0;
+
+            do vec::consume(move buckets) |_i, bucket| {
+                match move bucket {
+                    None => { },
+                    Some(move bucket) => {
+                        let Bucket {
+                            key: move key,
+                            value: move value,
+                            _
+                        } = move bucket;
+                        f(move key, move value)
+                    }
+                }
+            }
+        }
+
+        pure fn get(&self, k: &K) -> &self/V {
+            match self.find(k) {
+                Some(v) => v,
+                None => fail fmt!("No entry found for key: %?", k),
+            }
+        }
+    }
+
+    impl<K:Hash IterBytes Eq, V: Copy> LinearMap<K, V> {
+        pure fn find_copy(&const self, k: &K) -> Option<V> {
+            match self.bucket_for_key(self.buckets, k) {
+                FoundEntry(idx) => {
+                    // FIXME (#3148): Once we rewrite found_entry, this
+                    // failure case won't be necessary
+                    match self.buckets[idx] {
+                        Some(Bucket {value: copy value, _}) => {Some(value)}
+                        None => fail ~"LinearMap::find: internal logic error"
+                    }
+                }
+                TableFull | FoundHole(_) => {
+                    None
+                }
+            }
+        }
+    }
+
+    impl<K:Hash IterBytes Eq, V: Eq> LinearMap<K, V>: Eq {
+        pure fn eq(&self, other: &LinearMap<K, V>) -> bool {
+            if self.len() != other.len() { return false; }
+
+            for self.each |key, value| {
+                match other.find(key) {
+                    None => return false,
+                    Some(v) => if value != v { return false },
+                }
+            }
+
+            return true;
+        }
+
+        pure fn ne(&self, other: &LinearMap<K, V>) -> bool {
+            !self.eq(other)
+        }
+    }
+
+    pub struct LinearSet<T: Hash IterBytes Eq> {
+        priv map: LinearMap<T, ()>
+    }
+
+    impl <T: Hash IterBytes Eq> LinearSet<T>: BaseIter<T> {
+        /// Visit all values in order
+        pure fn each(&self, f: fn(&T) -> bool) { self.map.each_key(f) }
+        pure fn size_hint(&self) -> Option<uint> { Some(self.len()) }
+    }
+
+    impl <T: Hash IterBytes Eq> LinearSet<T>: Eq {
+        pure fn eq(&self, other: &LinearSet<T>) -> bool {
+            self.map == other.map
+        }
+        pure fn ne(&self, other: &LinearSet<T>) -> bool {
+            self.map != other.map
+        }
+    }
+
+    impl <T: Hash IterBytes Eq> LinearSet<T>: Container {
+        /// Return the number of elements in the set
+        pure fn len(&self) -> uint { self.map.len() }
+
+        /// Return true if the set contains no elements
+        pure fn is_empty(&self) -> bool { self.map.is_empty() }
+    }
+
+    impl <T: Hash IterBytes Eq> LinearSet<T>: Mutable {
+        /// Clear the set, removing all values.
+        fn clear(&mut self) { self.map.clear() }
+    }
+
+    impl <T: Hash IterBytes Eq> LinearSet<T>: Set<T> {
+        /// Return true if the set contains a value
+        pure fn contains(&self, value: &T) -> bool {
+            self.map.contains_key(value)
+        }
+
+        /// Add a value to the set. Return true if the value was not already
+        /// present in the set.
+        fn insert(&mut self, value: T) -> bool { self.map.insert(value, ()) }
+
+        /// Remove a value from the set. Return true if the value was
+        /// present in the set.
+        fn remove(&mut self, value: &T) -> bool { self.map.remove(value) }
+    }
+
+    pub impl <T: Hash IterBytes Eq> LinearSet<T> {
+        /// Create an empty LinearSet
+        static fn new() -> LinearSet<T> { LinearSet{map: LinearMap()} }
+    }
+}
+
+#[test]
+pub mod test {
+    use option::{None, Some};
+    use hashmap::linear::LinearMap;
+    use hashmap::linear;
+    use uint;
+
+    #[test]
+    pub fn inserts() {
+        let mut m = LinearMap::new();
+        assert m.insert(1, 2);
+        assert m.insert(2, 4);
+        assert *m.get(&1) == 2;
+        assert *m.get(&2) == 4;
+    }
+
+    #[test]
+    pub fn overwrite() {
+        let mut m = LinearMap::new();
+        assert m.insert(1, 2);
+        assert *m.get(&1) == 2;
+        assert !m.insert(1, 3);
+        assert *m.get(&1) == 3;
+    }
+
+    #[test]
+    pub fn conflicts() {
+        let mut m = linear::linear_map_with_capacity(4);
+        assert m.insert(1, 2);
+        assert m.insert(5, 3);
+        assert m.insert(9, 4);
+        assert *m.get(&9) == 4;
+        assert *m.get(&5) == 3;
+        assert *m.get(&1) == 2;
+    }
+
+    #[test]
+    pub fn conflict_remove() {
+        let mut m = linear::linear_map_with_capacity(4);
+        assert m.insert(1, 2);
+        assert m.insert(5, 3);
+        assert m.insert(9, 4);
+        assert m.remove(&1);
+        assert *m.get(&9) == 4;
+        assert *m.get(&5) == 3;
+    }
+
+    #[test]
+    pub fn empty() {
+        let mut m = linear::linear_map_with_capacity(4);
+        assert m.insert(1, 2);
+        assert !m.is_empty();
+        assert m.remove(&1);
+        assert m.is_empty();
+    }
+
+    #[test]
+    pub fn pops() {
+        let mut m = LinearMap::new();
+        m.insert(1, 2);
+        assert m.pop(&1) == Some(2);
+        assert m.pop(&1) == None;
+    }
+
+    #[test]
+    pub fn swaps() {
+        let mut m = LinearMap::new();
+        assert m.swap(1, 2) == None;
+        assert m.swap(1, 3) == Some(2);
+        assert m.swap(1, 4) == Some(3);
+    }
+
+    #[test]
+    pub fn consumes() {
+        let mut m = LinearMap::new();
+        assert m.insert(1, 2);
+        assert m.insert(2, 3);
+        let mut m2 = LinearMap::new();
+        do m.consume |k, v| {
+            m2.insert(k, v);
+        }
+        assert m.len() == 0;
+        assert m2.len() == 2;
+        assert m2.find_copy(&1) == Some(2);
+        assert m2.find_copy(&2) == Some(3);
+    }
+
+    #[test]
+    pub fn iterate() {
+        let mut m = linear::linear_map_with_capacity(4);
+        for uint::range(0, 32) |i| {
+            assert m.insert(i, i*2);
+        }
+        let mut observed = 0;
+        for m.each |k, v| {
+            assert *v == *k * 2;
+            observed |= (1 << *k);
+        }
+        assert observed == 0xFFFF_FFFF;
+    }
+
+    #[test]
+    pub fn find() {
+        let mut m = LinearMap::new();
+        assert m.find(&1).is_none();
+        m.insert(1, 2);
+        match m.find(&1) {
+            None => fail,
+            Some(v) => assert *v == 2
+        }
+    }
+
+    #[test]
+    pub fn test_eq() {
+        let mut m1 = LinearMap::new();
+        m1.insert(1, 2);
+        m1.insert(2, 3);
+        m1.insert(3, 4);
+
+        let mut m2 = LinearMap::new();
+        m2.insert(1, 2);
+        m2.insert(2, 3);
+
+        assert m1 != m2;
+
+        m2.insert(3, 4);
+
+        assert m1 == m2;
+    }
+
+    #[test]
+    pub fn test_expand() {
+        let mut m = LinearMap::new();
+
+        assert m.len() == 0;
+        assert m.is_empty();
+
+        let mut i = 0u;
+        let old_resize_at = m.resize_at;
+        while old_resize_at == m.resize_at {
+            m.insert(i, i);
+            i += 1;
+        }
+
+        assert m.len() == i;
+        assert !m.is_empty();
+    }
+}