about summary refs log tree commit diff
diff options
context:
space:
mode:
authorCheng XU <git@xuc.me>2021-08-28 16:48:45 -0700
committerCheng XU <git@xuc.me>2021-08-28 17:18:50 -0700
commitcf814d60f82723e5965763859c51b3e7bd885b9b (patch)
tree3076d1accc7df88f4ad350dc57d8fea3ccdd1f43
parent6a6885c6bd1d44969ced14ab7f3ea9d543bf14a2 (diff)
downloadrust-cf814d60f82723e5965763859c51b3e7bd885b9b.tar.gz
rust-cf814d60f82723e5965763859c51b3e7bd885b9b.zip
BTreeMap::from_iter: use bulk building to improve the performance
Bulk building is a common technique to increase the performance of
building a fresh btree map. Instead of inserting items one-by-one,
we sort all the items beforehand then create the BtreeMap in bulk.
-rw-r--r--library/alloc/src/collections/btree/dedup_sorted_iter.rs47
-rw-r--r--library/alloc/src/collections/btree/map.rs36
-rw-r--r--library/alloc/src/collections/btree/mod.rs1
3 files changed, 79 insertions, 5 deletions
diff --git a/library/alloc/src/collections/btree/dedup_sorted_iter.rs b/library/alloc/src/collections/btree/dedup_sorted_iter.rs
new file mode 100644
index 00000000000..60bf83b8387
--- /dev/null
+++ b/library/alloc/src/collections/btree/dedup_sorted_iter.rs
@@ -0,0 +1,47 @@
+use core::iter::Peekable;
+
+/// A iterator for deduping the key of a sorted iterator.
+/// When encountering the duplicated key, only the last key-value pair is yielded.
+///
+/// Used by [`BTreeMap::bulk_build_from_sorted_iter`].
+pub struct DedupSortedIter<K, V, I>
+where
+    I: Iterator<Item = (K, V)>,
+{
+    iter: Peekable<I>,
+}
+
+impl<K, V, I> DedupSortedIter<K, V, I>
+where
+    I: Iterator<Item = (K, V)>,
+{
+    pub fn new(iter: I) -> Self {
+        Self { iter: iter.peekable() }
+    }
+}
+
+impl<K, V, I> Iterator for DedupSortedIter<K, V, I>
+where
+    K: Eq,
+    I: Iterator<Item = (K, V)>,
+{
+    type Item = (K, V);
+
+    fn next(&mut self) -> Option<(K, V)> {
+        loop {
+            let next = match self.iter.next() {
+                Some(next) => next,
+                None => return None,
+            };
+
+            let peeked = match self.iter.peek() {
+                Some(peeked) => peeked,
+                None => return Some(next),
+            };
+
+            if next.0 != peeked.0 {
+                return Some(next);
+            }
+        }
+    }
+}
diff --git a/library/alloc/src/collections/btree/map.rs b/library/alloc/src/collections/btree/map.rs
index 4b649e43371..5e60851aec8 100644
--- a/library/alloc/src/collections/btree/map.rs
+++ b/library/alloc/src/collections/btree/map.rs
@@ -1,3 +1,4 @@
+use crate::vec::Vec;
 use core::borrow::Borrow;
 use core::cmp::Ordering;
 use core::fmt::{self, Debug};
@@ -9,6 +10,7 @@ use core::ops::{Index, RangeBounds};
 use core::ptr;
 
 use super::borrow::DormantMutRef;
+use super::dedup_sorted_iter::DedupSortedIter;
 use super::navigate::{LazyLeafRange, LeafRange};
 use super::node::{self, marker, ForceResult::*, Handle, NodeRef, Root};
 use super::search::SearchResult::*;
@@ -1290,6 +1292,18 @@ impl<K, V> BTreeMap<K, V> {
     pub fn into_values(self) -> IntoValues<K, V> {
         IntoValues { inner: self.into_iter() }
     }
+
+    /// Makes a `BTreeMap` from a sorted iterator.
+    pub(crate) fn bulk_build_from_sorted_iter<I>(iter: I) -> Self
+    where
+        K: Ord,
+        I: Iterator<Item = (K, V)>,
+    {
+        let mut root = Root::new();
+        let mut length = 0;
+        root.bulk_push(DedupSortedIter::new(iter), &mut length);
+        BTreeMap { root: Some(root), length }
+    }
 }
 
 #[stable(feature = "rust1", since = "1.0.0")]
@@ -1914,9 +1928,15 @@ impl<K, V> FusedIterator for RangeMut<'_, K, V> {}
 #[stable(feature = "rust1", since = "1.0.0")]
 impl<K: Ord, V> FromIterator<(K, V)> for BTreeMap<K, V> {
     fn from_iter<T: IntoIterator<Item = (K, V)>>(iter: T) -> BTreeMap<K, V> {
-        let mut map = BTreeMap::new();
-        map.extend(iter);
-        map
+        let mut inputs: Vec<_> = iter.into_iter().collect();
+
+        if inputs.is_empty() {
+            return BTreeMap::new();
+        }
+
+        // use stable sort to preserve the insertion order.
+        inputs.sort_by(|a, b| a.0.cmp(&b.0));
+        BTreeMap::bulk_build_from_sorted_iter(inputs.into_iter())
     }
 }
 
@@ -2025,8 +2045,14 @@ impl<K: Ord, V, const N: usize> From<[(K, V); N]> for BTreeMap<K, V> {
     /// let map2: BTreeMap<_, _> = [(1, 2), (3, 4)].into();
     /// assert_eq!(map1, map2);
     /// ```
-    fn from(arr: [(K, V); N]) -> Self {
-        core::array::IntoIter::new(arr).collect()
+    fn from(mut arr: [(K, V); N]) -> Self {
+        if N == 0 {
+            return BTreeMap::new();
+        }
+
+        // use stable sort to preserve the insertion order.
+        arr.sort_by(|a, b| a.0.cmp(&b.0));
+        BTreeMap::bulk_build_from_sorted_iter(core::array::IntoIter::new(arr))
     }
 }
 
diff --git a/library/alloc/src/collections/btree/mod.rs b/library/alloc/src/collections/btree/mod.rs
index f74172c7d97..9571b3d594d 100644
--- a/library/alloc/src/collections/btree/mod.rs
+++ b/library/alloc/src/collections/btree/mod.rs
@@ -1,5 +1,6 @@
 mod append;
 mod borrow;
+mod dedup_sorted_iter;
 mod fix;
 pub mod map;
 mod mem;