about summary refs log tree commit diff
path: root/src
diff options
context:
space:
mode:
authorbors <bors@rust-lang.org>2016-04-22 17:54:30 -0700
committerbors <bors@rust-lang.org>2016-04-22 17:54:30 -0700
commit66ff163081685aa48bc59033eb5280052963a750 (patch)
tree78384c49fd9e157f4867d440773415f33923d18d /src
parentaf000a7bbffcaf5e75ff97b245fa5a413062acc1 (diff)
parent241a3e4689d3004daf9e1d36cec2235cbd301fbf (diff)
downloadrust-66ff163081685aa48bc59033eb5280052963a750.tar.gz
rust-66ff163081685aa48bc59033eb5280052963a750.zip
Auto merge of #32466 - jooert:btree_append, r=apasel422
Implement `append` for b-trees.

I have finally found time to revive #26227, this time only with an `append` implementation.

The algorithm implemented here is linear in the size of the two b-trees. It firsts creates
a `MergeIter` from the two b-trees and then builds a new b-tree by pushing
key-value pairs from the `MergeIter` into nodes at the right heights.

Three functions for stealing have been added to the implementation of `Handle` as
well as a getter for the height of a `NodeRef`.

The docs have been updated with performance information about `BTreeMap::append` and
the remark about B has been removed now that it is the same for all instances of `BTreeMap`.

cc @gereeter @Gankro @apasel422
Diffstat (limited to 'src')
-rw-r--r--src/libcollections/btree/map.rs189
-rw-r--r--src/libcollections/btree/node.rs139
-rw-r--r--src/libcollections/btree/set.rs35
-rw-r--r--src/libcollectionstest/btree/map.rs52
-rw-r--r--src/libcollectionstest/btree/set.rs24
-rw-r--r--src/libcollectionstest/lib.rs1
-rw-r--r--src/libstd/collections/mod.rs10
7 files changed, 420 insertions, 30 deletions
diff --git a/src/libcollections/btree/map.rs b/src/libcollections/btree/map.rs
index de40568fd67..20ef2738de1 100644
--- a/src/libcollections/btree/map.rs
+++ b/src/libcollections/btree/map.rs
@@ -11,7 +11,7 @@
 use core::cmp::Ordering;
 use core::fmt::Debug;
 use core::hash::{Hash, Hasher};
-use core::iter::FromIterator;
+use core::iter::{FromIterator, Peekable};
 use core::marker::PhantomData;
 use core::ops::Index;
 use core::{fmt, intrinsics, mem, ptr};
@@ -348,6 +348,12 @@ pub struct OccupiedEntry<'a, K: 'a, V: 'a> {
     _marker: PhantomData<&'a mut (K, V)>,
 }
 
+// An iterator for merging two sorted sequences into one
+struct MergeIter<K, V, I: Iterator<Item=(K, V)>> {
+    left: Peekable<I>,
+    right: Peekable<I>,
+}
+
 impl<K: Ord, V> BTreeMap<K, V> {
     /// Makes a new empty BTreeMap with a reasonable choice for B.
     ///
@@ -535,6 +541,62 @@ impl<K: Ord, V> BTreeMap<K, V> {
         }
     }
 
+    /// Moves all elements from `other` into `Self`, leaving `other` empty.
+    ///
+    /// # Examples
+    ///
+    /// ```
+    /// #![feature(btree_append)]
+    /// use std::collections::BTreeMap;
+    ///
+    /// let mut a = BTreeMap::new();
+    /// a.insert(1, "a");
+    /// a.insert(2, "b");
+    /// a.insert(3, "c");
+    ///
+    /// let mut b = BTreeMap::new();
+    /// b.insert(3, "d");
+    /// b.insert(4, "e");
+    /// b.insert(5, "f");
+    ///
+    /// a.append(&mut b);
+    ///
+    /// assert_eq!(a.len(), 5);
+    /// assert_eq!(b.len(), 0);
+    ///
+    /// assert_eq!(a[&1], "a");
+    /// assert_eq!(a[&2], "b");
+    /// assert_eq!(a[&3], "d");
+    /// assert_eq!(a[&4], "e");
+    /// assert_eq!(a[&5], "f");
+    /// ```
+    #[unstable(feature = "btree_append", reason = "recently added as part of collections reform 2",
+               issue = "19986")]
+    pub fn append(&mut self, other: &mut Self) {
+        // Do we have to append anything at all?
+        if other.len() == 0 {
+            return;
+        }
+
+        // We can just swap `self` and `other` if `self` is empty.
+        if self.len() == 0 {
+            mem::swap(self, other);
+            return;
+        }
+
+        // First, we merge `self` and `other` into a sorted sequence in linear time.
+        let self_iter = mem::replace(self, BTreeMap::new()).into_iter();
+        let other_iter = mem::replace(other, BTreeMap::new()).into_iter();
+        let iter = MergeIter {
+            left: self_iter.peekable(),
+            right: other_iter.peekable(),
+        };
+
+        // Second, we build a tree from the sorted sequence in linear time.
+        self.from_sorted_iter(iter);
+        self.fix_right_edge();
+    }
+
     /// Constructs a double-ended iterator over a sub-range of elements in the map, starting
     /// at min, and ending at max. If min is `Unbounded`, then it will be treated as "negative
     /// infinity", and if max is `Unbounded`, then it will be treated as "positive infinity".
@@ -724,6 +786,76 @@ impl<K: Ord, V> BTreeMap<K, V> {
             })
         }
     }
+
+    fn from_sorted_iter<I: Iterator<Item=(K, V)>>(&mut self, iter: I) {
+        let mut cur_node = last_leaf_edge(self.root.as_mut()).into_node();
+        // Iterate through all key-value pairs, pushing them into nodes at the right level.
+        for (key, value) in iter {
+            // Try to push key-value pair into the current leaf node.
+            if cur_node.len() < node::CAPACITY {
+                cur_node.push(key, value);
+            } else {
+                // No space left, go up and push there.
+                let mut open_node;
+                let mut test_node = cur_node.forget_type();
+                loop {
+                    match test_node.ascend() {
+                        Ok(parent) => {
+                            let parent = parent.into_node();
+                            if parent.len() < node::CAPACITY {
+                                // Found a node with space left, push here.
+                                open_node = parent;
+                                break;
+                            } else {
+                                // Go up again.
+                                test_node = parent.forget_type();
+                            }
+                        },
+                        Err(node) => {
+                            // We are at the top, create a new root node and push there.
+                            open_node = node.into_root_mut().push_level();
+                            break;
+                        },
+                    }
+                }
+
+                // Push key-value pair and new right subtree.
+                let tree_height = open_node.height() - 1;
+                let mut right_tree = node::Root::new_leaf();
+                for _ in 0..tree_height {
+                    right_tree.push_level();
+                }
+                open_node.push(key, value, right_tree);
+
+                // Go down to the right-most leaf again.
+                cur_node = last_leaf_edge(open_node.forget_type()).into_node();
+            }
+
+            self.length += 1;
+        }
+    }
+
+    fn fix_right_edge(&mut self) {
+        // Handle underfull nodes, start from the top.
+        let mut cur_node = self.root.as_mut();
+        while let Internal(internal) = cur_node.force() {
+            // Check if right-most child is underfull.
+            let mut last_edge = internal.last_edge();
+            let right_child_len = last_edge.reborrow().descend().len();
+            if right_child_len < node::CAPACITY / 2 {
+                // We need to steal.
+                let mut last_kv = match last_edge.left_kv() {
+                    Ok(left) => left,
+                    Err(_) => unreachable!(),
+                };
+                last_kv.bulk_steal_left(node::CAPACITY/2 - right_child_len);
+                last_edge = last_kv.right_edge();
+            }
+
+            // Go further down.
+            cur_node = last_edge.descend();
+        }
+    }
 }
 
 impl<'a, K: 'a, V: 'a> IntoIterator for &'a BTreeMap<K, V> {
@@ -1690,32 +1822,41 @@ fn handle_underfull_node<'a, K, V>(node: NodeRef<marker::Mut<'a>,
     };
 
     if handle.can_merge() {
-        return Merged(handle.merge().into_node());
+        Merged(handle.merge().into_node())
     } else {
-        unsafe {
-            let (k, v, edge) = if is_left {
-                handle.reborrow_mut().left_edge().descend().pop()
-            } else {
-                handle.reborrow_mut().right_edge().descend().pop_front()
-            };
+        if is_left {
+            handle.steal_left();
+        } else {
+            handle.steal_right();
+        }
+        Stole(handle.into_node())
+    }
+}
 
-            let k = mem::replace(handle.reborrow_mut().into_kv_mut().0, k);
-            let v = mem::replace(handle.reborrow_mut().into_kv_mut().1, v);
+impl<K: Ord, V, I: Iterator<Item=(K, V)>> Iterator for MergeIter<K, V, I> {
+    type Item = (K, V);
 
-            // FIXME: reuse cur_node?
-            if is_left {
-                match handle.reborrow_mut().right_edge().descend().force() {
-                    Leaf(mut leaf) => leaf.push_front(k, v),
-                    Internal(mut internal) => internal.push_front(k, v, edge.unwrap())
-                }
-            } else {
-                match handle.reborrow_mut().left_edge().descend().force() {
-                    Leaf(mut leaf) => leaf.push(k, v),
-                    Internal(mut internal) => internal.push(k, v, edge.unwrap())
-                }
-            }
-        }
+    fn next(&mut self) -> Option<(K, V)> {
+        let res = match (self.left.peek(), self.right.peek()) {
+            (Some(&(ref left_key, _)), Some(&(ref right_key, _))) => left_key.cmp(right_key),
+            (Some(_), None) => Ordering::Less,
+            (None, Some(_)) => Ordering::Greater,
+            (None, None) => return None,
+        };
 
-        return Stole(handle.into_node());
+        // Check which elements comes first and only advance the corresponding iterator.
+        // If two keys are equal, take the value from `right`.
+        match res {
+            Ordering::Less => {
+                self.left.next()
+            },
+            Ordering::Greater => {
+                self.right.next()
+            },
+            Ordering::Equal => {
+                self.left.next();
+                self.right.next()
+            },
+        }
     }
 }
diff --git a/src/libcollections/btree/node.rs b/src/libcollections/btree/node.rs
index ad34de9e3df..ca1cf6bcc50 100644
--- a/src/libcollections/btree/node.rs
+++ b/src/libcollections/btree/node.rs
@@ -328,6 +328,12 @@ impl<BorrowType, K, V, Type> NodeRef<BorrowType, K, V, Type> {
         self.as_leaf().len as usize
     }
 
+    /// Returns the height of this node in the whole tree. Zero height denotes the
+    /// leaf level.
+    pub fn height(&self) -> usize {
+        self.height
+    }
+
     /// Removes any static information about whether this node is a `Leaf` or an
     /// `Internal` node.
     pub fn forget_type(self) -> NodeRef<BorrowType, K, V, marker::LeafOrInternal> {
@@ -1233,6 +1239,139 @@ impl<'a, K, V> Handle<NodeRef<marker::Mut<'a>, K, V, marker::Internal>, marker::
             Handle::new_edge(self.node, self.idx)
         }
     }
+
+    /// This removes a key/value pair from the left child and replaces it with the key/value pair
+    /// pointed to by this handle while pushing the old key/value pair of this handle into the right
+    /// child.
+    pub fn steal_left(&mut self) {
+        unsafe {
+            let (k, v, edge) = self.reborrow_mut().left_edge().descend().pop();
+
+            let k = mem::replace(self.reborrow_mut().into_kv_mut().0, k);
+            let v = mem::replace(self.reborrow_mut().into_kv_mut().1, v);
+
+            match self.reborrow_mut().right_edge().descend().force() {
+                ForceResult::Leaf(mut leaf) => leaf.push_front(k, v),
+                ForceResult::Internal(mut internal) => internal.push_front(k, v, edge.unwrap())
+            }
+        }
+    }
+
+    /// This removes a key/value pair from the right child and replaces it with the key/value pair
+    /// pointed to by this handle while pushing the old key/value pair of this handle into the left
+    /// child.
+    pub fn steal_right(&mut self) {
+        unsafe {
+            let (k, v, edge) = self.reborrow_mut().right_edge().descend().pop_front();
+
+            let k = mem::replace(self.reborrow_mut().into_kv_mut().0, k);
+            let v = mem::replace(self.reborrow_mut().into_kv_mut().1, v);
+
+            match self.reborrow_mut().left_edge().descend().force() {
+                ForceResult::Leaf(mut leaf) => leaf.push(k, v),
+                ForceResult::Internal(mut internal) => internal.push(k, v, edge.unwrap())
+            }
+        }
+    }
+
+    /// This does stealing similar to `steal_left` but steals multiple elements at once.
+    pub fn bulk_steal_left(&mut self, n: usize) {
+        unsafe {
+            // Get raw pointers to left child's keys, values and edges.
+            let (left_len, left_k, left_v, left_e) = {
+                let mut left = self.reborrow_mut().left_edge().descend();
+
+                (left.len(),
+                 left.keys_mut().as_mut_ptr(),
+                 left.vals_mut().as_mut_ptr(),
+                 match left.force() {
+                     ForceResult::Leaf(_) => None,
+                     ForceResult::Internal(mut i) => Some(i.as_internal_mut().edges.as_mut_ptr()),
+                 })
+            };
+
+            // Get raw pointers to right child's keys, values and edges.
+            let (right_len, right_k, right_v, right_e) = {
+                let mut right = self.reborrow_mut().right_edge().descend();
+
+                (right.len(),
+                 right.keys_mut().as_mut_ptr(),
+                 right.vals_mut().as_mut_ptr(),
+                 match right.force() {
+                     ForceResult::Leaf(_) => None,
+                     ForceResult::Internal(mut i) => Some(i.as_internal_mut().edges.as_mut_ptr()),
+                 })
+            };
+
+            // Get raw pointers to parent's key and value.
+            let (parent_k, parent_v) = {
+                let kv = self.reborrow_mut().into_kv_mut();
+                (kv.0 as *mut K, kv.1 as *mut V)
+            };
+
+            // Make sure that we may steal safely.
+            debug_assert!(right_len + n <= CAPACITY);
+            debug_assert!(left_len >= n);
+
+            // Make room for stolen elements in right child.
+            ptr::copy(right_k,
+                      right_k.offset(n as isize),
+                      right_len);
+            ptr::copy(right_v,
+                      right_v.offset(n as isize),
+                      right_len);
+            if let Some(edges) = right_e {
+                ptr::copy(edges,
+                          edges.offset(n as isize),
+                          right_len+1);
+            }
+
+            // Move elements from the left child to the right one.
+            let left_ind = (left_len - n) as isize;
+            ptr::copy_nonoverlapping(left_k.offset(left_ind + 1),
+                                     right_k,
+                                     n - 1);
+            ptr::copy_nonoverlapping(left_v.offset(left_ind + 1),
+                                     right_v,
+                                     n - 1);
+            match (left_e, right_e) {
+                (Some(left), Some(right)) => {
+                    ptr::copy_nonoverlapping(left.offset(left_ind + 1),
+                                             right,
+                                             n);
+                },
+                (Some(_), None) => unreachable!(),
+                (None, Some(_)) => unreachable!(),
+                (None, None) => {},
+            }
+
+            // Copy parent key/value pair to right child.
+            ptr::copy_nonoverlapping(parent_k,
+                                     right_k.offset(n as isize - 1),
+                                     1);
+            ptr::copy_nonoverlapping(parent_v,
+                                     right_v.offset(n as isize - 1),
+                                     1);
+            // Copy left-most stolen pair to parent.
+            ptr::copy_nonoverlapping(left_k.offset(left_ind),
+                                     parent_k,
+                                     1);
+            ptr::copy_nonoverlapping(left_v.offset(left_ind),
+                                     parent_v,
+                                     1);
+
+            // Fix lengths of left and right child and parent pointers in children of the right
+            // child.
+            self.reborrow_mut().left_edge().descend().as_leaf_mut().len -= n as u16;
+            let mut right = self.reborrow_mut().right_edge().descend();
+            right.as_leaf_mut().len += n as u16;
+            if let ForceResult::Internal(mut node) = right.force() {
+                for i in 0..(right_len+n+1) {
+                    Handle::new_edge(node.reborrow_mut(), i as usize).correct_parent_link();
+                }
+            }
+        }
+    }
 }
 
 impl<BorrowType, K, V, HandleType>
diff --git a/src/libcollections/btree/set.rs b/src/libcollections/btree/set.rs
index e679381f223..5419d7a301a 100644
--- a/src/libcollections/btree/set.rs
+++ b/src/libcollections/btree/set.rs
@@ -545,6 +545,41 @@ impl<T: Ord> BTreeSet<T> {
     {
         Recover::take(&mut self.map, value)
     }
+
+    /// Moves all elements from `other` into `Self`, leaving `other` empty.
+    ///
+    /// # Examples
+    ///
+    /// ```
+    /// #![feature(btree_append)]
+    /// use std::collections::BTreeSet;
+    ///
+    /// let mut a = BTreeSet::new();
+    /// a.insert(1);
+    /// a.insert(2);
+    /// a.insert(3);
+    ///
+    /// let mut b = BTreeSet::new();
+    /// b.insert(3);
+    /// b.insert(4);
+    /// b.insert(5);
+    ///
+    /// a.append(&mut b);
+    ///
+    /// assert_eq!(a.len(), 5);
+    /// assert_eq!(b.len(), 0);
+    ///
+    /// assert!(a.contains(&1));
+    /// assert!(a.contains(&2));
+    /// assert!(a.contains(&3));
+    /// assert!(a.contains(&4));
+    /// assert!(a.contains(&5));
+    /// ```
+    #[unstable(feature = "btree_append", reason = "recently added as part of collections reform 2",
+               issue = "19986")]
+    pub fn append(&mut self, other: &mut Self) {
+        self.map.append(&mut other.map);
+    }
 }
 
 #[stable(feature = "rust1", since = "1.0.0")]
diff --git a/src/libcollectionstest/btree/map.rs b/src/libcollectionstest/btree/map.rs
index 619bc189e6c..1858791776f 100644
--- a/src/libcollectionstest/btree/map.rs
+++ b/src/libcollectionstest/btree/map.rs
@@ -446,6 +446,58 @@ fn test_vacant_entry_key() {
     assert_eq!(a[key], value);
 }
 
+macro_rules! create_append_test {
+    ($name:ident, $len:expr) => {
+        #[test]
+        fn $name() {
+            let mut a = BTreeMap::new();
+            for i in 0..8 {
+                a.insert(i, i);
+            }
+
+            let mut b = BTreeMap::new();
+            for i in 5..$len {
+                b.insert(i, 2*i);
+            }
+
+            a.append(&mut b);
+
+            assert_eq!(a.len(), $len);
+            assert_eq!(b.len(), 0);
+
+            for i in 0..$len {
+                if i < 5 {
+                    assert_eq!(a[&i], i);
+                } else {
+                    assert_eq!(a[&i], 2*i);
+                }
+            }
+
+            assert_eq!(a.remove(&($len-1)), Some(2*($len-1)));
+            assert_eq!(a.insert($len-1, 20), None);
+        }
+    };
+}
+
+// These are mostly for testing the algorithm that "fixes" the right edge after insertion.
+// Single node.
+create_append_test!(test_append_9, 9);
+// Two leafs that don't need fixing.
+create_append_test!(test_append_17, 17);
+// Two leafs where the second one ends up underfull and needs stealing at the end.
+create_append_test!(test_append_14, 14);
+// Two leafs where the second one ends up empty because the insertion finished at the root.
+create_append_test!(test_append_12, 12);
+// Three levels; insertion finished at the root.
+create_append_test!(test_append_144, 144);
+// Three levels; insertion finished at leaf while there is an empty node on the second level.
+create_append_test!(test_append_145, 145);
+// Tests for several randomly chosen sizes.
+create_append_test!(test_append_170, 170);
+create_append_test!(test_append_181, 181);
+create_append_test!(test_append_239, 239);
+create_append_test!(test_append_1700, 1700);
+
 mod bench {
     use std::collections::BTreeMap;
     use std::__rand::{Rng, thread_rng};
diff --git a/src/libcollectionstest/btree/set.rs b/src/libcollectionstest/btree/set.rs
index 3928804a8ed..53ccfd5b4e2 100644
--- a/src/libcollectionstest/btree/set.rs
+++ b/src/libcollectionstest/btree/set.rs
@@ -265,3 +265,27 @@ fn test_variance() {
     fn into_iter<'new>(v: IntoIter<&'static str>) -> IntoIter<&'new str> { v }
     fn range<'a, 'new>(v: Range<'a, &'static str>) -> Range<'a, &'new str> { v }
 }
+
+#[test]
+fn test_append() {
+    let mut a = BTreeSet::new();
+    a.insert(1);
+    a.insert(2);
+    a.insert(3);
+
+    let mut b = BTreeSet::new();
+    b.insert(3);
+    b.insert(4);
+    b.insert(5);
+
+    a.append(&mut b);
+
+    assert_eq!(a.len(), 5);
+    assert_eq!(b.len(), 0);
+
+    assert_eq!(a.contains(&1), true);
+    assert_eq!(a.contains(&2), true);
+    assert_eq!(a.contains(&3), true);
+    assert_eq!(a.contains(&4), true);
+    assert_eq!(a.contains(&5), true);
+}
diff --git a/src/libcollectionstest/lib.rs b/src/libcollectionstest/lib.rs
index 056ac13585c..e4152b99d2c 100644
--- a/src/libcollectionstest/lib.rs
+++ b/src/libcollectionstest/lib.rs
@@ -13,6 +13,7 @@
 #![feature(binary_heap_extras)]
 #![feature(binary_heap_append)]
 #![feature(box_syntax)]
+#![feature(btree_append)]
 #![feature(btree_range)]
 #![feature(collections)]
 #![feature(collections_bound)]
diff --git a/src/libstd/collections/mod.rs b/src/libstd/collections/mod.rs
index 4de442fd3a1..44613d77671 100644
--- a/src/libstd/collections/mod.rs
+++ b/src/libstd/collections/mod.rs
@@ -120,12 +120,10 @@
 //!
 //! For Sets, all operations have the cost of the equivalent Map operation.
 //!
-//! |          | get       | insert   | remove   | predecessor |
-//! |----------|-----------|----------|----------|-------------|
-//! | HashMap  | O(1)~     | O(1)~*   | O(1)~    | N/A         |
-//! | BTreeMap | O(log n)  | O(log n) | O(log n) | O(log n)    |
-//!
-//! Note that BTreeMap's precise performance depends on the value of B.
+//! |          | get       | insert   | remove   | predecessor | append |
+//! |----------|-----------|----------|----------|-------------|--------|
+//! | HashMap  | O(1)~     | O(1)~*   | O(1)~    | N/A         | N/A    |
+//! | BTreeMap | O(log n)  | O(log n) | O(log n) | O(log n)    | O(n+m) |
 //!
 //! # Correct and Efficient Usage of Collections
 //!