about summary refs log tree commit diff
diff options
context:
space:
mode:
authorbors <bors@rust-lang.org>2021-09-07 02:24:11 +0000
committerbors <bors@rust-lang.org>2021-09-07 02:24:11 +0000
commitffaf857045f4f4d8bb563e0a5077f9b065f42916 (patch)
treec4ddda3dc84bfd079a01e805fe3b865a04698cab
parent11bbb5231349a0a144d86d5c0c21061a06d1969d (diff)
parenta03287bbf765ce7ac0e2ae9e64d8ade168ece301 (diff)
downloadrust-ffaf857045f4f4d8bb563e0a5077f9b065f42916.tar.gz
rust-ffaf857045f4f4d8bb563e0a5077f9b065f42916.zip
Auto merge of #88448 - xu-cheng:btree-blk-build, r=Mark-Simulacrum
BTreeMap/BTreeSet::from_iter: use bulk building to improve the performance

Bulk building is a common technique to increase the performance of building a fresh btree map. Instead of inserting items one-by-one, we sort all the items beforehand then create the BtreeMap in bulk.

Benchmark
```
./x.py bench library/alloc --test-args btree::map::from_iter
```

* Before
```
test btree::map::from_iter_rand_100                      ... bench:       3,694 ns/iter (+/- 840)
test btree::map::from_iter_rand_10_000                   ... bench:   1,033,446 ns/iter (+/- 192,950)
test btree::map::from_iter_seq_100                       ... bench:       5,689 ns/iter (+/- 1,259)
test btree::map::from_iter_seq_10_000                    ... bench:     861,033 ns/iter (+/- 118,815)
```

* After
```
test btree::map::from_iter_rand_100                      ... bench:       3,033 ns/iter (+/- 707)
test btree::map::from_iter_rand_10_000                   ... bench:     775,958 ns/iter (+/- 105,152)
test btree::map::from_iter_seq_100                       ... bench:       2,969 ns/iter (+/- 336)
test btree::map::from_iter_seq_10_000                    ... bench:     258,292 ns/iter (+/- 29,364)
```
-rw-r--r--library/alloc/benches/btree/map.rs50
-rw-r--r--library/alloc/src/collections/btree/dedup_sorted_iter.rs47
-rw-r--r--library/alloc/src/collections/btree/map.rs36
-rw-r--r--library/alloc/src/collections/btree/mod.rs1
-rw-r--r--library/alloc/src/collections/btree/set.rs27
5 files changed, 151 insertions, 10 deletions
diff --git a/library/alloc/benches/btree/map.rs b/library/alloc/benches/btree/map.rs
index 920a5ca7db0..c304f748847 100644
--- a/library/alloc/benches/btree/map.rs
+++ b/library/alloc/benches/btree/map.rs
@@ -54,6 +54,50 @@ macro_rules! map_insert_seq_bench {
     };
 }
 
+macro_rules! map_from_iter_rand_bench {
+    ($name: ident, $n: expr, $map: ident) => {
+        #[bench]
+        pub fn $name(b: &mut Bencher) {
+            let n: usize = $n;
+            // setup
+            let mut rng = thread_rng();
+            let mut vec = Vec::with_capacity(n);
+
+            for _ in 0..n {
+                let i = rng.gen::<usize>() % n;
+                vec.push((i, i));
+            }
+
+            // measure
+            b.iter(|| {
+                let map: $map<_, _> = vec.iter().copied().collect();
+                black_box(map);
+            });
+        }
+    };
+}
+
+macro_rules! map_from_iter_seq_bench {
+    ($name: ident, $n: expr, $map: ident) => {
+        #[bench]
+        pub fn $name(b: &mut Bencher) {
+            let n: usize = $n;
+            // setup
+            let mut vec = Vec::with_capacity(n);
+
+            for i in 0..n {
+                vec.push((i, i));
+            }
+
+            // measure
+            b.iter(|| {
+                let map: $map<_, _> = vec.iter().copied().collect();
+                black_box(map);
+            });
+        }
+    };
+}
+
 macro_rules! map_find_rand_bench {
     ($name: ident, $n: expr, $map: ident) => {
         #[bench]
@@ -111,6 +155,12 @@ map_insert_rand_bench! {insert_rand_10_000, 10_000, BTreeMap}
 map_insert_seq_bench! {insert_seq_100,    100,    BTreeMap}
 map_insert_seq_bench! {insert_seq_10_000, 10_000, BTreeMap}
 
+map_from_iter_rand_bench! {from_iter_rand_100,    100,    BTreeMap}
+map_from_iter_rand_bench! {from_iter_rand_10_000, 10_000, BTreeMap}
+
+map_from_iter_seq_bench! {from_iter_seq_100,    100,    BTreeMap}
+map_from_iter_seq_bench! {from_iter_seq_10_000, 10_000, BTreeMap}
+
 map_find_rand_bench! {find_rand_100,    100,    BTreeMap}
 map_find_rand_bench! {find_rand_10_000, 10_000, BTreeMap}
 
diff --git a/library/alloc/src/collections/btree/dedup_sorted_iter.rs b/library/alloc/src/collections/btree/dedup_sorted_iter.rs
new file mode 100644
index 00000000000..60bf83b8387
--- /dev/null
+++ b/library/alloc/src/collections/btree/dedup_sorted_iter.rs
@@ -0,0 +1,47 @@
+use core::iter::Peekable;
+
+/// A iterator for deduping the key of a sorted iterator.
+/// When encountering the duplicated key, only the last key-value pair is yielded.
+///
+/// Used by [`BTreeMap::bulk_build_from_sorted_iter`].
+pub struct DedupSortedIter<K, V, I>
+where
+    I: Iterator<Item = (K, V)>,
+{
+    iter: Peekable<I>,
+}
+
+impl<K, V, I> DedupSortedIter<K, V, I>
+where
+    I: Iterator<Item = (K, V)>,
+{
+    pub fn new(iter: I) -> Self {
+        Self { iter: iter.peekable() }
+    }
+}
+
+impl<K, V, I> Iterator for DedupSortedIter<K, V, I>
+where
+    K: Eq,
+    I: Iterator<Item = (K, V)>,
+{
+    type Item = (K, V);
+
+    fn next(&mut self) -> Option<(K, V)> {
+        loop {
+            let next = match self.iter.next() {
+                Some(next) => next,
+                None => return None,
+            };
+
+            let peeked = match self.iter.peek() {
+                Some(peeked) => peeked,
+                None => return Some(next),
+            };
+
+            if next.0 != peeked.0 {
+                return Some(next);
+            }
+        }
+    }
+}
diff --git a/library/alloc/src/collections/btree/map.rs b/library/alloc/src/collections/btree/map.rs
index 70a838a35f9..501a604e7f7 100644
--- a/library/alloc/src/collections/btree/map.rs
+++ b/library/alloc/src/collections/btree/map.rs
@@ -1,3 +1,4 @@
+use crate::vec::Vec;
 use core::borrow::Borrow;
 use core::cmp::Ordering;
 use core::fmt::{self, Debug};
@@ -9,6 +10,7 @@ use core::ops::{Index, RangeBounds};
 use core::ptr;
 
 use super::borrow::DormantMutRef;
+use super::dedup_sorted_iter::DedupSortedIter;
 use super::navigate::{LazyLeafRange, LeafRange};
 use super::node::{self, marker, ForceResult::*, Handle, NodeRef, Root};
 use super::search::SearchResult::*;
@@ -1285,6 +1287,18 @@ impl<K, V> BTreeMap<K, V> {
     pub fn into_values(self) -> IntoValues<K, V> {
         IntoValues { inner: self.into_iter() }
     }
+
+    /// Makes a `BTreeMap` from a sorted iterator.
+    pub(crate) fn bulk_build_from_sorted_iter<I>(iter: I) -> Self
+    where
+        K: Ord,
+        I: Iterator<Item = (K, V)>,
+    {
+        let mut root = Root::new();
+        let mut length = 0;
+        root.bulk_push(DedupSortedIter::new(iter), &mut length);
+        BTreeMap { root: Some(root), length }
+    }
 }
 
 #[stable(feature = "rust1", since = "1.0.0")]
@@ -1909,9 +1923,15 @@ impl<K, V> FusedIterator for RangeMut<'_, K, V> {}
 #[stable(feature = "rust1", since = "1.0.0")]
 impl<K: Ord, V> FromIterator<(K, V)> for BTreeMap<K, V> {
     fn from_iter<T: IntoIterator<Item = (K, V)>>(iter: T) -> BTreeMap<K, V> {
-        let mut map = BTreeMap::new();
-        map.extend(iter);
-        map
+        let mut inputs: Vec<_> = iter.into_iter().collect();
+
+        if inputs.is_empty() {
+            return BTreeMap::new();
+        }
+
+        // use stable sort to preserve the insertion order.
+        inputs.sort_by(|a, b| a.0.cmp(&b.0));
+        BTreeMap::bulk_build_from_sorted_iter(inputs.into_iter())
     }
 }
 
@@ -2020,8 +2040,14 @@ impl<K: Ord, V, const N: usize> From<[(K, V); N]> for BTreeMap<K, V> {
     /// let map2: BTreeMap<_, _> = [(1, 2), (3, 4)].into();
     /// assert_eq!(map1, map2);
     /// ```
-    fn from(arr: [(K, V); N]) -> Self {
-        core::array::IntoIter::new(arr).collect()
+    fn from(mut arr: [(K, V); N]) -> Self {
+        if N == 0 {
+            return BTreeMap::new();
+        }
+
+        // use stable sort to preserve the insertion order.
+        arr.sort_by(|a, b| a.0.cmp(&b.0));
+        BTreeMap::bulk_build_from_sorted_iter(core::array::IntoIter::new(arr))
     }
 }
 
diff --git a/library/alloc/src/collections/btree/mod.rs b/library/alloc/src/collections/btree/mod.rs
index f74172c7d97..9571b3d594d 100644
--- a/library/alloc/src/collections/btree/mod.rs
+++ b/library/alloc/src/collections/btree/mod.rs
@@ -1,5 +1,6 @@
 mod append;
 mod borrow;
+mod dedup_sorted_iter;
 mod fix;
 pub mod map;
 mod mem;
diff --git a/library/alloc/src/collections/btree/set.rs b/library/alloc/src/collections/btree/set.rs
index ff0db22e0cc..c664e201aec 100644
--- a/library/alloc/src/collections/btree/set.rs
+++ b/library/alloc/src/collections/btree/set.rs
@@ -1,6 +1,7 @@
 // This is pretty much entirely stolen from TreeSet, since BTreeMap has an identical interface
 // to TreeMap
 
+use crate::vec::Vec;
 use core::borrow::Borrow;
 use core::cmp::Ordering::{Equal, Greater, Less};
 use core::cmp::{max, min};
@@ -1056,9 +1057,17 @@ impl<T> BTreeSet<T> {
 #[stable(feature = "rust1", since = "1.0.0")]
 impl<T: Ord> FromIterator<T> for BTreeSet<T> {
     fn from_iter<I: IntoIterator<Item = T>>(iter: I) -> BTreeSet<T> {
-        let mut set = BTreeSet::new();
-        set.extend(iter);
-        set
+        let mut inputs: Vec<_> = iter.into_iter().collect();
+
+        if inputs.is_empty() {
+            return BTreeSet::new();
+        }
+
+        // use stable sort to preserve the insertion order.
+        inputs.sort();
+        let iter = inputs.into_iter().map(|k| (k, ()));
+        let map = BTreeMap::bulk_build_from_sorted_iter(iter);
+        BTreeSet { map }
     }
 }
 
@@ -1071,8 +1080,16 @@ impl<T: Ord, const N: usize> From<[T; N]> for BTreeSet<T> {
     /// let set2: BTreeSet<_> = [1, 2, 3, 4].into();
     /// assert_eq!(set1, set2);
     /// ```
-    fn from(arr: [T; N]) -> Self {
-        core::array::IntoIter::new(arr).collect()
+    fn from(mut arr: [T; N]) -> Self {
+        if N == 0 {
+            return BTreeSet::new();
+        }
+
+        // use stable sort to preserve the insertion order.
+        arr.sort();
+        let iter = core::array::IntoIter::new(arr).map(|k| (k, ()));
+        let map = BTreeMap::bulk_build_from_sorted_iter(iter);
+        BTreeSet { map }
     }
 }