about summary refs log tree commit diff
path: root/src/libcore
diff options
context:
space:
mode:
authorbors <bors@rust-lang.org>2013-03-04 08:21:47 -0800
committerbors <bors@rust-lang.org>2013-03-04 08:21:47 -0800
commitc639a78dc4a9be4e5db0265cf8e73be28ce922f6 (patch)
treef22ed3e6def026597e34b62efc9eaf5b91a7f12b /src/libcore
parentedcebac1ccdbc353fe3bc68892137584258ff1ee (diff)
parenta4d22635e1453d77e90c4335f5c2e1f62e3185eb (diff)
downloadrust-c639a78dc4a9be4e5db0265cf8e73be28ce922f6.tar.gz
rust-c639a78dc4a9be4e5db0265cf8e73be28ce922f6.zip
auto merge of #5205 : thestinger/rust/radix, r=graydon
This is an implementation of a map and set for integer keys. It's an ordered container (by byte order, which is sorted order for integers and byte strings when done in the right direction) with O(1) worst-case lookup, removal and insertion. There's no rebalancing or rehashing so it's actually O(1) without amortizing any costs.

The fanout can be adjusted in multiples of 2 from 2-ary through 256-ary, but it's hardcoded at 16-ary because there isn't a way to expose that in the type system yet. To keep things simple, it also only allows `uint` keys, but later I'll expand it to all the built-in integer types and byte arrays.

There's quite a bit of room for performance improvement, along with the boost that will come with dropping the headers on `Owned` `~` and getting rid of the overhead from the stack switches to the allocator. It currently does suffix compression for a single node and then splits into two n-ary trie nodes, which could be replaced with an array for at least 4-8 suffixes before splitting it. There's also the option of doing path compression, which may be a good or a bad idea and depends a lot on the data stored.

I want to share the test suite with the other maps so that's why I haven't duplicated all of the existing integer key tests in this file. I'll send in another pull request to deal with that.

Current benchmark numbers against the other map types:

    TreeMap:
     Sequential integers:
      insert: 0.798295
      search: 0.188931
      remove: 0.435923
     Random integers:
      insert: 1.557661
      search: 0.758325
      remove: 1.720527

    LinearMap:
     Sequential integers:
      insert: 0.272338
      search: 0.141179
      remove: 0.190273
     Random integers:
      insert: 0.293588
      search: 0.162677
      remove: 0.206142

    TrieMap:
     Sequential integers:
      insert: 0.0901
      search: 0.012223
      remove: 0.084139
     Random integers:
      insert: 0.392719
      search: 0.261632
      remove: 0.470401

@graydon is using an earlier version of this for the garbage collection implementation, so that's why I added this to libcore. I left out the `next` and `prev` methods *for now* because I just wanted the essentials first.
Diffstat (limited to 'src/libcore')
-rw-r--r--src/libcore/core.rc1
-rw-r--r--src/libcore/trie.rs369
2 files changed, 370 insertions, 0 deletions
diff --git a/src/libcore/core.rc b/src/libcore/core.rc
index 19042195573..010e3f8655b 100644
--- a/src/libcore/core.rc
+++ b/src/libcore/core.rc
@@ -144,6 +144,7 @@ pub mod dlist;
 pub mod dlist_iter;
 pub mod hashmap;
 pub mod cell;
+pub mod trie;
 
 
 /* Tasks and communication */
diff --git a/src/libcore/trie.rs b/src/libcore/trie.rs
new file mode 100644
index 00000000000..c8faccf28f2
--- /dev/null
+++ b/src/libcore/trie.rs
@@ -0,0 +1,369 @@
+// Copyright 2013 The Rust Project Developers. See the COPYRIGHT
+// file at the top-level directory of this distribution and at
+// http://rust-lang.org/COPYRIGHT.
+//
+// Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
+// http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
+// <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
+// option. This file may not be copied, modified, or distributed
+// except according to those terms.
+
+//! A radix trie for storing integers in sorted order
+
+use prelude::*;
+
+// FIXME: #3469: need to manually update TrieNode when SHIFT changes
+const SHIFT: uint = 4;
+const SIZE: uint = 1 << SHIFT;
+const MASK: uint = SIZE - 1;
+
+enum Child<T> {
+    Internal(~TrieNode<T>),
+    External(uint, T),
+    Nothing
+}
+
+pub struct TrieMap<T> {
+    priv root: TrieNode<T>,
+    priv length: uint
+}
+
+impl<T> BaseIter<(uint, &T)> for TrieMap<T> {
+    /// Visit all key-value pairs in order
+    #[inline(always)]
+    pure fn each(&self, f: fn(&(uint, &self/T)) -> bool) {
+        self.root.each(f)
+    }
+    #[inline(always)]
+    pure fn size_hint(&self) -> Option<uint> { Some(self.len()) }
+}
+
+impl<T> ReverseIter<(uint, &T)> for TrieMap<T> {
+    /// Visit all key-value pairs in reverse order
+    #[inline(always)]
+    pure fn each_reverse(&self, f: fn(&(uint, &self/T)) -> bool) {
+        self.root.each_reverse(f)
+    }
+}
+
+impl<T> Container for TrieMap<T> {
+    /// Return the number of elements in the map
+    #[inline(always)]
+    pure fn len(&self) -> uint { self.length }
+
+    /// Return true if the map contains no elements
+    #[inline(always)]
+    pure fn is_empty(&self) -> bool { self.len() == 0 }
+}
+
+impl<T: Copy> Mutable for TrieMap<T> {
+    /// Clear the map, removing all values.
+    #[inline(always)]
+    fn clear(&mut self) {
+        self.root = TrieNode::new();
+        self.length = 0;
+    }
+}
+
+impl<T: Copy> Map<uint, T> for TrieMap<T> {
+    /// Return true if the map contains a value for the specified key
+    #[inline(always)]
+    pure fn contains_key(&self, key: &uint) -> bool {
+        self.find(key).is_some()
+    }
+
+    /// Visit all keys in order
+    #[inline(always)]
+    pure fn each_key(&self, f: fn(&uint) -> bool) {
+        self.each(|&(k, _)| f(&k))
+    }
+
+    /// Visit all values in order
+    #[inline(always)]
+    pure fn each_value(&self, f: fn(&T) -> bool) { self.each(|&(_, v)| f(v)) }
+
+    /// Return the value corresponding to the key in the map
+    #[inline(hint)]
+    pure fn find(&self, key: &uint) -> Option<&self/T> {
+        let mut node: &self/TrieNode<T> = &self.root;
+        let mut idx = 0;
+        loop {
+            match node.children[chunk(*key, idx)] {
+              Internal(ref x) => node = &**x,
+              External(stored, ref value) => {
+                if stored == *key {
+                    return Some(value)
+                } else {
+                    return None
+                }
+              }
+              Nothing => return None
+            }
+            idx += 1;
+        }
+    }
+
+    /// Insert a key-value pair into the map. An existing value for a
+    /// key is replaced by the new value. Return true if the key did
+    /// not already exist in the map.
+    #[inline(always)]
+    fn insert(&mut self, key: uint, value: T) -> bool {
+        let ret = insert(&mut self.root.count,
+                         &mut self.root.children[chunk(key, 0)],
+                         key, value, 1);
+        if ret { self.length += 1 }
+        ret
+    }
+
+    /// Remove a key-value pair from the map. Return true if the key
+    /// was present in the map, otherwise false.
+    #[inline(always)]
+    fn remove(&mut self, key: &uint) -> bool {
+        let ret = remove(&mut self.root.count,
+                         &mut self.root.children[chunk(*key, 0)],
+                         *key, 1);
+        if ret { self.length -= 1 }
+        ret
+    }
+}
+
+impl<T: Copy> TrieMap<T> {
+    #[inline(always)]
+    static pure fn new() -> TrieMap<T> {
+        TrieMap{root: TrieNode::new(), length: 0}
+    }
+}
+
+impl<T> TrieMap<T> {
+    /// Visit all keys in reverse order
+    #[inline(always)]
+    pure fn each_key_reverse(&self, f: fn(&uint) -> bool) {
+        self.each_reverse(|&(k, _)| f(&k))
+    }
+
+    /// Visit all values in reverse order
+    #[inline(always)]
+    pure fn each_value_reverse(&self, f: fn(&T) -> bool) {
+        self.each_reverse(|&(_, v)| f(v))
+    }
+
+    /// Iterate over the map and mutate the contained values
+    fn mutate_values(&mut self, f: fn(uint, &mut T) -> bool) {
+        self.root.mutate_values(f)
+    }
+}
+
+pub struct TrieSet {
+    priv map: TrieMap<()>
+}
+
+impl BaseIter<uint> for TrieSet {
+    /// Visit all values in order
+    pure fn each(&self, f: fn(&uint) -> bool) { self.map.each_key(f) }
+    pure fn size_hint(&self) -> Option<uint> { Some(self.len()) }
+}
+
+impl ReverseIter<uint> for TrieSet {
+    /// Visit all values in reverse order
+    pure fn each_reverse(&self, f: fn(&uint) -> bool) {
+        self.map.each_key_reverse(f)
+    }
+}
+
+impl Container for TrieSet {
+    /// Return the number of elements in the set
+    #[inline(always)]
+    pure fn len(&self) -> uint { self.map.len() }
+
+    /// Return true if the set contains no elements
+    #[inline(always)]
+    pure fn is_empty(&self) -> bool { self.map.is_empty() }
+}
+
+impl Mutable for TrieSet {
+    /// Clear the set, removing all values.
+    #[inline(always)]
+    fn clear(&mut self) { self.map.clear() }
+}
+
+impl TrieSet {
+    /// Return true if the set contains a value
+    #[inline(always)]
+    pure fn contains(&self, value: &uint) -> bool {
+        self.map.contains_key(value)
+    }
+
+    /// Add a value to the set. Return true if the value was not already
+    /// present in the set.
+    #[inline(always)]
+    fn insert(&mut self, value: uint) -> bool { self.map.insert(value, ()) }
+
+    /// Remove a value from the set. Return true if the value was
+    /// present in the set.
+    #[inline(always)]
+    fn remove(&mut self, value: &uint) -> bool { self.map.remove(value) }
+}
+
+struct TrieNode<T> {
+    count: uint,
+    children: [Child<T> * 16] // FIXME: #3469: can't use the SIZE constant yet
+}
+
+impl<T: Copy> TrieNode<T> {
+    #[inline(always)]
+    static pure fn new() -> TrieNode<T> {
+        TrieNode{count: 0, children: [Nothing, ..SIZE]}
+    }
+}
+
+impl<T> TrieNode<T> {
+    pure fn each(&self, f: fn(&(uint, &self/T)) -> bool) {
+        for uint::range(0, self.children.len()) |idx| {
+            match self.children[idx] {
+                Internal(ref x) => x.each(f),
+                External(k, ref v) => if !f(&(k, v)) { return },
+                Nothing => ()
+            }
+        }
+    }
+
+    pure fn each_reverse(&self, f: fn(&(uint, &self/T)) -> bool) {
+        for uint::range_rev(self.children.len(), 0) |idx| {
+            match self.children[idx - 1] {
+                Internal(ref x) => x.each(f),
+                External(k, ref v) => if !f(&(k, v)) { return },
+                Nothing => ()
+            }
+        }
+    }
+
+    fn mutate_values(&mut self, f: fn(uint, &mut T) -> bool) {
+        for vec::each_mut(self.children) |child| {
+            match *child {
+                Internal(ref mut x) => x.mutate_values(f),
+                External(k, ref mut v) => if !f(k, v) { return },
+                Nothing => ()
+            }
+        }
+    }
+}
+
+// if this was done via a trait, the key could be generic
+#[inline(always)]
+pure fn chunk(n: uint, idx: uint) -> uint {
+    let real_idx = uint::bytes - 1 - idx;
+    (n >> (SHIFT * real_idx)) & MASK
+}
+
+fn insert<T: Copy>(count: &mut uint, child: &mut Child<T>, key: uint,
+                   value: T, idx: uint) -> bool {
+    match *child {
+      External(stored_key, stored_value) => {
+          if stored_key == key {
+              false // already in the trie
+          } else {
+              // conflict - split the node
+              let mut new = ~TrieNode::new();
+              insert(&mut new.count,
+                     &mut new.children[chunk(stored_key, idx)],
+                     stored_key, stored_value, idx + 1);
+              insert(&mut new.count, &mut new.children[chunk(key, idx)], key,
+                     value, idx + 1);
+              *child = Internal(new);
+              true
+          }
+      }
+      Internal(ref mut x) => {
+        insert(&mut x.count, &mut x.children[chunk(key, idx)], key, value,
+               idx + 1)
+      }
+      Nothing => {
+        *count += 1;
+        *child = External(key, value);
+        true
+      }
+    }
+}
+
+fn remove<T>(count: &mut uint, child: &mut Child<T>, key: uint,
+             idx: uint) -> bool {
+    let (ret, this) = match *child {
+      External(stored, _) => {
+          if stored == key { (true, true) } else { (false, false) }
+      }
+      Internal(ref mut x) => {
+          let ret = remove(&mut x.count, &mut x.children[chunk(key, idx)],
+                           key, idx + 1);
+          (ret, x.count == 0)
+      }
+      Nothing => (false, false)
+    };
+
+    if this {
+        *child = Nothing;
+        *count -= 1;
+    }
+    ret
+}
+
+#[cfg(test)]
+pub fn check_integrity<T>(trie: &TrieNode<T>) {
+    assert trie.count != 0;
+
+    let mut sum = 0;
+
+    for trie.children.each |x| {
+        match *x {
+          Nothing => (),
+          Internal(ref y) => {
+              check_integrity(&**y);
+              sum += 1
+          }
+          External(_, _) => { sum += 1 }
+        }
+    }
+
+    assert sum == trie.count;
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+    use uint;
+
+    #[test]
+    fn test_step() {
+        let mut trie = TrieMap::new();
+        let n = 300;
+
+        for uint::range_step(1, n, 2) |x| {
+            assert trie.insert(x, x + 1);
+            assert trie.contains_key(&x);
+            check_integrity(&trie.root);
+        }
+
+        for uint::range_step(0, n, 2) |x| {
+            assert !trie.contains_key(&x);
+            assert trie.insert(x, x + 1);
+            check_integrity(&trie.root);
+        }
+
+        for uint::range(0, n) |x| {
+            assert trie.contains_key(&x);
+            assert !trie.insert(x, x + 1);
+            check_integrity(&trie.root);
+        }
+
+        for uint::range_step(1, n, 2) |x| {
+            assert trie.remove(&x);
+            assert !trie.contains_key(&x);
+            check_integrity(&trie.root);
+        }
+
+        for uint::range_step(0, n, 2) |x| {
+            assert trie.contains_key(&x);
+            assert !trie.insert(x, x + 1);
+            check_integrity(&trie.root);
+        }
+    }
+}